diff mbox

[RFC,2/3] tcg: Add support for fence generation in x86 backend

Message ID 20160524171856.1000-3-bobby.prani@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pranith Kumar May 24, 2016, 5:18 p.m. UTC
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/i386/tcg-target.h     | 1 +
 tcg/i386/tcg-target.inc.c | 9 +++++++++
 tcg/tcg-opc.h             | 2 +-
 tcg/tcg.c                 | 1 +
 4 files changed, 12 insertions(+), 1 deletion(-)

Comments

Richard Henderson May 25, 2016, 5:35 p.m. UTC | #1
On 05/24/2016 10:18 AM, Pranith Kumar wrote:
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  tcg/i386/tcg-target.h     | 1 +
>  tcg/i386/tcg-target.inc.c | 9 +++++++++
>  tcg/tcg-opc.h             | 2 +-
>  tcg/tcg.c                 | 1 +
>  4 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 92be341..93ea42e 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -100,6 +100,7 @@ extern bool have_bmi1;
>  #define TCG_TARGET_HAS_muls2_i32        1
>  #define TCG_TARGET_HAS_muluh_i32        0
>  #define TCG_TARGET_HAS_mulsh_i32        0
> +#define TCG_TARGET_HAS_fence            1

This has to be defined for all hosts.

The default implementation should be a function call into tcg-runtime.c that 
calls smp_mb().

> @@ -347,6 +347,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
>  #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
>  #define OPC_TESTL	(0x85)
>  #define OPC_XCHG_ax_r32	(0x90)
> +#define OPC_MFENCE      (0xAE | P_EXT)
>
>  #define OPC_GRP3_Ev	(0xf7)
>  #define OPC_GRP5	(0xff)
> @@ -686,6 +687,14 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
>      }
>  }
>
> +static inline void tcg_out_fence(TCGContext *s)
> +{
> +    /* TODO: Figure out an appropriate place for the encoding */
> +    tcg_out8(s, 0x0F);
> +    tcg_out8(s, 0xAE);
> +    tcg_out8(s, 0xF0);
> +}

Why define OPC_MFENCE if you're not going to use it?  Of course, it's not 
exactly a complete and useful definition, so maybe just delete OPC_MFENCE.

Also, for 32-bit you need to check for sse2 before outputting this.  See also 
the existing cpuid checks in tcg_target_init and the fallback smp_mb definition 
for pre-gcc-4.4.


r~
Alex Bennée May 25, 2016, 7:25 p.m. UTC | #2
Richard Henderson <rth@twiddle.net> writes:

> On 05/24/2016 10:18 AM, Pranith Kumar wrote:
>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>> ---
>>  tcg/i386/tcg-target.h     | 1 +
>>  tcg/i386/tcg-target.inc.c | 9 +++++++++
>>  tcg/tcg-opc.h             | 2 +-
>>  tcg/tcg.c                 | 1 +
>>  4 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
>> index 92be341..93ea42e 100644
>> --- a/tcg/i386/tcg-target.h
>> +++ b/tcg/i386/tcg-target.h
>> @@ -100,6 +100,7 @@ extern bool have_bmi1;
>>  #define TCG_TARGET_HAS_muls2_i32        1
>>  #define TCG_TARGET_HAS_muluh_i32        0
>>  #define TCG_TARGET_HAS_mulsh_i32        0
>> +#define TCG_TARGET_HAS_fence            1
>
> This has to be defined for all hosts.
>
> The default implementation should be a function call into tcg-runtime.c that
> calls smp_mb().

That would solves the problem of converting the various backends
piecemeal - although obviously we should move to all backends having
"native" support ASAP. However by introducing expensive substitute
functions we will slow down the translations as each front end is
expanded to translate the target barrier ops.

Should we make the emitting of the function call/TCGop conditional on
MTTCG being enabled? If we are running in round-robin mode there is no
need to issue any fence operations.

>
>> @@ -347,6 +347,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
>>  #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
>>  #define OPC_TESTL	(0x85)
>>  #define OPC_XCHG_ax_r32	(0x90)
>> +#define OPC_MFENCE      (0xAE | P_EXT)
>>
>>  #define OPC_GRP3_Ev	(0xf7)
>>  #define OPC_GRP5	(0xff)
>> @@ -686,6 +687,14 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
>>      }
>>  }
>>
>> +static inline void tcg_out_fence(TCGContext *s)
>> +{
>> +    /* TODO: Figure out an appropriate place for the encoding */
>> +    tcg_out8(s, 0x0F);
>> +    tcg_out8(s, 0xAE);
>> +    tcg_out8(s, 0xF0);
>> +}
>
> Why define OPC_MFENCE if you're not going to use it?  Of course, it's not
> exactly a complete and useful definition, so maybe just delete OPC_MFENCE.
>
> Also, for 32-bit you need to check for sse2 before outputting this.  See also
> the existing cpuid checks in tcg_target_init and the fallback smp_mb definition
> for pre-gcc-4.4.
>
>
> r~


--
Alex Bennée
Sergey Fedorov May 25, 2016, 7:43 p.m. UTC | #3
On 25/05/16 22:25, Alex Bennée wrote:
> Richard Henderson <rth@twiddle.net> writes:
>> On 05/24/2016 10:18 AM, Pranith Kumar wrote:
>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>> ---
>>>  tcg/i386/tcg-target.h     | 1 +
>>>  tcg/i386/tcg-target.inc.c | 9 +++++++++
>>>  tcg/tcg-opc.h             | 2 +-
>>>  tcg/tcg.c                 | 1 +
>>>  4 files changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
>>> index 92be341..93ea42e 100644
>>> --- a/tcg/i386/tcg-target.h
>>> +++ b/tcg/i386/tcg-target.h
>>> @@ -100,6 +100,7 @@ extern bool have_bmi1;
>>>  #define TCG_TARGET_HAS_muls2_i32        1
>>>  #define TCG_TARGET_HAS_muluh_i32        0
>>>  #define TCG_TARGET_HAS_mulsh_i32        0
>>> +#define TCG_TARGET_HAS_fence            1
>> This has to be defined for all hosts.
>>
>> The default implementation should be a function call into tcg-runtime.c that
>> calls smp_mb().
> That would solves the problem of converting the various backends
> piecemeal - although obviously we should move to all backends having
> "native" support ASAP. However by introducing expensive substitute
> functions we will slow down the translations as each front end is
> expanded to translate the target barrier ops.

I think it would better not to defer native support for the operation.
It should be relatively simple instruction. Otherwise we could wind up
deferring this indefinitely.

> Should we make the emitting of the function call/TCGop conditional on
> MTTCG being enabled? If we are running in round-robin mode there is no
> need to issue any fence operations.

Good idea.

Kind regards,
Sergey
Richard Henderson May 25, 2016, 7:50 p.m. UTC | #4
On 05/25/2016 12:25 PM, Alex Bennée wrote:
> That would solves the problem of converting the various backends
> piecemeal - although obviously we should move to all backends having
> "native" support ASAP. However by introducing expensive substitute
> functions we will slow down the translations as each front end is
> expanded to translate the target barrier ops.

Obviously.  We could in fact do that all up front if desired.  It doesn't take 
long to look up the barrier instructions for each isa.


> Should we make the emitting of the function call/TCGop conditional on
> MTTCG being enabled? If we are running in round-robin mode there is no
> need to issue any fence operations.

Probably.  But to keep the translators clean we should probably hide that 
within tcg_gen_fence().


r~
Pranith Kumar May 25, 2016, 7:56 p.m. UTC | #5
Hi Richard,

Thank you for the helpful comments.

On Wed, May 25, 2016 at 1:35 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 05/24/2016 10:18 AM, Pranith Kumar wrote:
>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
>> index 92be341..93ea42e 100644
>> --- a/tcg/i386/tcg-target.h
>> +++ b/tcg/i386/tcg-target.h
>> @@ -100,6 +100,7 @@ extern bool have_bmi1;
>>  #define TCG_TARGET_HAS_muls2_i32        1
>>  #define TCG_TARGET_HAS_muluh_i32        0
>>  #define TCG_TARGET_HAS_mulsh_i32        0
>> +#define TCG_TARGET_HAS_fence            1
>
>
> This has to be defined for all hosts.

OK. I will add an entry in tcg.h with default 0 and override in
individual architecture once it is implemented.

>> @@ -347,6 +347,7 @@ static inline int
>> tcg_target_const_match(tcg_target_long val, TCGType type,
>>  #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
>>  #define OPC_TESTL      (0x85)
>>  #define OPC_XCHG_ax_r32        (0x90)
>> +#define OPC_MFENCE      (0xAE | P_EXT)
>
> Why define OPC_MFENCE if you're not going to use it?  Of course, it's not
> exactly a complete and useful definition, so maybe just delete OPC_MFENCE.

I want to use OPC_MFENCE instead of hard-coding the value in
tcg_out_fence(), but as you said the definition is not complete(it
currently generates only 0x0FAE). I am trying to figure out how to
generate 0x0FAEF0 using the definition.

>
> Also, for 32-bit you need to check for sse2 before outputting this.  See
> also the existing cpuid checks in tcg_target_init and the fallback smp_mb
> definition for pre-gcc-4.4.

OK, I'll check the current code and do something similar.

Thanks,
Pranith Kumar May 25, 2016, 7:57 p.m. UTC | #6
On Wed, May 25, 2016 at 3:25 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
> Should we make the emitting of the function call/TCGop conditional on
> MTTCG being enabled? If we are running in round-robin mode there is no
> need to issue any fence operations.
>

Also, we should check if SMP(> 1 processors) is enabled since fences
are not necessary on UP systems.
Pranith Kumar May 25, 2016, 7:59 p.m. UTC | #7
On Wed, May 25, 2016 at 3:43 PM, Sergey Fedorov <serge.fdrv@gmail.com> wrote:
>
> I think it would better not to defer native support for the operation.
> It should be relatively simple instruction. Otherwise we could wind up
> deferring this indefinitely.
>

Agreed. I will go with the native generation for now.

Thanks,
Sergey Fedorov May 25, 2016, 8:02 p.m. UTC | #8
On 25/05/16 22:59, Pranith Kumar wrote:
> On Wed, May 25, 2016 at 3:43 PM, Sergey Fedorov <serge.fdrv@gmail.com> wrote:
>> I think it would better not to defer native support for the operation.
>> It should be relatively simple instruction. Otherwise we could wind up
>> deferring this indefinitely.
>>
> Agreed. I will go with the native generation for now.

I mean we'd better implement native support for all the supported host
architectures right away.

Kind regards,
Sergey
Alex Bennée May 26, 2016, 4:09 p.m. UTC | #9
Pranith Kumar <bobby.prani@gmail.com> writes:

> Hi Richard,
>
> Thank you for the helpful comments.
>
> On Wed, May 25, 2016 at 1:35 PM, Richard Henderson <rth@twiddle.net> wrote:
>> On 05/24/2016 10:18 AM, Pranith Kumar wrote:
>>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
>>> index 92be341..93ea42e 100644
>>> --- a/tcg/i386/tcg-target.h
>>> +++ b/tcg/i386/tcg-target.h
>>> @@ -100,6 +100,7 @@ extern bool have_bmi1;
>>>  #define TCG_TARGET_HAS_muls2_i32        1
>>>  #define TCG_TARGET_HAS_muluh_i32        0
>>>  #define TCG_TARGET_HAS_mulsh_i32        0
>>> +#define TCG_TARGET_HAS_fence            1
>>
>>
>> This has to be defined for all hosts.
>
> OK. I will add an entry in tcg.h with default 0 and override in
> individual architecture once it is implemented.
>
>>> @@ -347,6 +347,7 @@ static inline int
>>> tcg_target_const_match(tcg_target_long val, TCGType type,
>>>  #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
>>>  #define OPC_TESTL      (0x85)
>>>  #define OPC_XCHG_ax_r32        (0x90)
>>> +#define OPC_MFENCE      (0xAE | P_EXT)
>>
>> Why define OPC_MFENCE if you're not going to use it?  Of course, it's not
>> exactly a complete and useful definition, so maybe just delete OPC_MFENCE.
>
> I want to use OPC_MFENCE instead of hard-coding the value in
> tcg_out_fence(), but as you said the definition is not complete(it
> currently generates only 0x0FAE). I am trying to figure out how to
> generate 0x0FAEF0 using the definition.

I think your going to have to just use tcg_out_fence() and the
tcg_out_opc() does black magic with extra flag bits and assumes it is
encoding rx registers (at least in 64 bit mode).

However I would suggest a comment and maybe breakdown of the different
fence types you can emit. Will pre-P4 processors never need mfences?

>
>>
>> Also, for 32-bit you need to check for sse2 before outputting this.  See
>> also the existing cpuid checks in tcg_target_init and the fallback smp_mb
>> definition for pre-gcc-4.4.
>
> OK, I'll check the current code and do something similar.
>
> Thanks,


--
Alex Bennée
diff mbox

Patch

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 92be341..93ea42e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -100,6 +100,7 @@  extern bool have_bmi1;
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_fence            1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_extrl_i64_i32    0
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 238fa10..cf49272 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -347,6 +347,7 @@  static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
 #define OPC_TESTL	(0x85)
 #define OPC_XCHG_ax_r32	(0x90)
+#define OPC_MFENCE      (0xAE | P_EXT)
 
 #define OPC_GRP3_Ev	(0xf7)
 #define OPC_GRP5	(0xff)
@@ -686,6 +687,14 @@  static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
     }
 }
 
+static inline void tcg_out_fence(TCGContext *s)
+{
+    /* TODO: Figure out an appropriate place for the encoding */
+    tcg_out8(s, 0x0F);
+    tcg_out8(s, 0xAE);
+    tcg_out8(s, 0xF0);
+}
+
 static inline void tcg_out_push(TCGContext *s, int reg)
 {
     tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 4696cf1..b772d90 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,7 +42,7 @@  DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 # define IMPL64  TCG_OPF_64BIT
 #endif
 
-DEF(fence, 0, 0, 0, 0)
+DEF(fence, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
 
 DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b5a22ba..461a33e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2444,6 +2444,7 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
                                dead_args, sync_args);
             break;
         case INDEX_op_fence:
+            tcg_out_fence(s);
             break;
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */