Message ID | 20160524171856.1000-3-bobby.prani@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/24/2016 10:18 AM, Pranith Kumar wrote: > Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> > --- > tcg/i386/tcg-target.h | 1 + > tcg/i386/tcg-target.inc.c | 9 +++++++++ > tcg/tcg-opc.h | 2 +- > tcg/tcg.c | 1 + > 4 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index 92be341..93ea42e 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -100,6 +100,7 @@ extern bool have_bmi1; > #define TCG_TARGET_HAS_muls2_i32 1 > #define TCG_TARGET_HAS_muluh_i32 0 > #define TCG_TARGET_HAS_mulsh_i32 0 > +#define TCG_TARGET_HAS_fence 1 This has to be defined for all hosts. The default implementation should be a function call into tcg-runtime.c that calls smp_mb(). > @@ -347,6 +347,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, > #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) > #define OPC_TESTL (0x85) > #define OPC_XCHG_ax_r32 (0x90) > +#define OPC_MFENCE (0xAE | P_EXT) > > #define OPC_GRP3_Ev (0xf7) > #define OPC_GRP5 (0xff) > @@ -686,6 +687,14 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) > } > } > > +static inline void tcg_out_fence(TCGContext *s) > +{ > + /* TODO: Figure out an appropriate place for the encoding */ > + tcg_out8(s, 0x0F); > + tcg_out8(s, 0xAE); > + tcg_out8(s, 0xF0); > +} Why define OPC_MFENCE if you're not going to use it? Of course, it's not exactly a complete and useful definition, so maybe just delete OPC_MFENCE. Also, for 32-bit you need to check for sse2 before outputting this. See also the existing cpuid checks in tcg_target_init and the fallback smp_mb definition for pre-gcc-4.4. r~
Richard Henderson <rth@twiddle.net> writes: > On 05/24/2016 10:18 AM, Pranith Kumar wrote: >> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> >> --- >> tcg/i386/tcg-target.h | 1 + >> tcg/i386/tcg-target.inc.c | 9 +++++++++ >> tcg/tcg-opc.h | 2 +- >> tcg/tcg.c | 1 + >> 4 files changed, 12 insertions(+), 1 deletion(-) >> >> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h >> index 92be341..93ea42e 100644 >> --- a/tcg/i386/tcg-target.h >> +++ b/tcg/i386/tcg-target.h >> @@ -100,6 +100,7 @@ extern bool have_bmi1; >> #define TCG_TARGET_HAS_muls2_i32 1 >> #define TCG_TARGET_HAS_muluh_i32 0 >> #define TCG_TARGET_HAS_mulsh_i32 0 >> +#define TCG_TARGET_HAS_fence 1 > > This has to be defined for all hosts. > > The default implementation should be a function call into tcg-runtime.c that > calls smp_mb(). That would solves the problem of converting the various backends piecemeal - although obviously we should move to all backends having "native" support ASAP. However by introducing expensive substitute functions we will slow down the translations as each front end is expanded to translate the target barrier ops. Should we make the emitting of the function call/TCGop conditional on MTTCG being enabled? If we are running in round-robin mode there is no need to issue any fence operations. > >> @@ -347,6 +347,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, >> #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) >> #define OPC_TESTL (0x85) >> #define OPC_XCHG_ax_r32 (0x90) >> +#define OPC_MFENCE (0xAE | P_EXT) >> >> #define OPC_GRP3_Ev (0xf7) >> #define OPC_GRP5 (0xff) >> @@ -686,6 +687,14 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) >> } >> } >> >> +static inline void tcg_out_fence(TCGContext *s) >> +{ >> + /* TODO: Figure out an appropriate place for the encoding */ >> + tcg_out8(s, 0x0F); >> + tcg_out8(s, 0xAE); >> + tcg_out8(s, 0xF0); >> +} > > Why define OPC_MFENCE if you're not going to use it? Of course, it's not > exactly a complete and useful definition, so maybe just delete OPC_MFENCE. > > Also, for 32-bit you need to check for sse2 before outputting this. See also > the existing cpuid checks in tcg_target_init and the fallback smp_mb definition > for pre-gcc-4.4. > > > r~ -- Alex Bennée
On 25/05/16 22:25, Alex Bennée wrote: > Richard Henderson <rth@twiddle.net> writes: >> On 05/24/2016 10:18 AM, Pranith Kumar wrote: >>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> >>> --- >>> tcg/i386/tcg-target.h | 1 + >>> tcg/i386/tcg-target.inc.c | 9 +++++++++ >>> tcg/tcg-opc.h | 2 +- >>> tcg/tcg.c | 1 + >>> 4 files changed, 12 insertions(+), 1 deletion(-) >>> >>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h >>> index 92be341..93ea42e 100644 >>> --- a/tcg/i386/tcg-target.h >>> +++ b/tcg/i386/tcg-target.h >>> @@ -100,6 +100,7 @@ extern bool have_bmi1; >>> #define TCG_TARGET_HAS_muls2_i32 1 >>> #define TCG_TARGET_HAS_muluh_i32 0 >>> #define TCG_TARGET_HAS_mulsh_i32 0 >>> +#define TCG_TARGET_HAS_fence 1 >> This has to be defined for all hosts. >> >> The default implementation should be a function call into tcg-runtime.c that >> calls smp_mb(). > That would solves the problem of converting the various backends > piecemeal - although obviously we should move to all backends having > "native" support ASAP. However by introducing expensive substitute > functions we will slow down the translations as each front end is > expanded to translate the target barrier ops. I think it would better not to defer native support for the operation. It should be relatively simple instruction. Otherwise we could wind up deferring this indefinitely. > Should we make the emitting of the function call/TCGop conditional on > MTTCG being enabled? If we are running in round-robin mode there is no > need to issue any fence operations. Good idea. Kind regards, Sergey
On 05/25/2016 12:25 PM, Alex Bennée wrote: > That would solves the problem of converting the various backends > piecemeal - although obviously we should move to all backends having > "native" support ASAP. However by introducing expensive substitute > functions we will slow down the translations as each front end is > expanded to translate the target barrier ops. Obviously. We could in fact do that all up front if desired. It doesn't take long to look up the barrier instructions for each isa. > Should we make the emitting of the function call/TCGop conditional on > MTTCG being enabled? If we are running in round-robin mode there is no > need to issue any fence operations. Probably. But to keep the translators clean we should probably hide that within tcg_gen_fence(). r~
Hi Richard, Thank you for the helpful comments. On Wed, May 25, 2016 at 1:35 PM, Richard Henderson <rth@twiddle.net> wrote: > On 05/24/2016 10:18 AM, Pranith Kumar wrote: >> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h >> index 92be341..93ea42e 100644 >> --- a/tcg/i386/tcg-target.h >> +++ b/tcg/i386/tcg-target.h >> @@ -100,6 +100,7 @@ extern bool have_bmi1; >> #define TCG_TARGET_HAS_muls2_i32 1 >> #define TCG_TARGET_HAS_muluh_i32 0 >> #define TCG_TARGET_HAS_mulsh_i32 0 >> +#define TCG_TARGET_HAS_fence 1 > > > This has to be defined for all hosts. OK. I will add an entry in tcg.h with default 0 and override in individual architecture once it is implemented. >> @@ -347,6 +347,7 @@ static inline int >> tcg_target_const_match(tcg_target_long val, TCGType type, >> #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) >> #define OPC_TESTL (0x85) >> #define OPC_XCHG_ax_r32 (0x90) >> +#define OPC_MFENCE (0xAE | P_EXT) > > Why define OPC_MFENCE if you're not going to use it? Of course, it's not > exactly a complete and useful definition, so maybe just delete OPC_MFENCE. I want to use OPC_MFENCE instead of hard-coding the value in tcg_out_fence(), but as you said the definition is not complete(it currently generates only 0x0FAE). I am trying to figure out how to generate 0x0FAEF0 using the definition. > > Also, for 32-bit you need to check for sse2 before outputting this. See > also the existing cpuid checks in tcg_target_init and the fallback smp_mb > definition for pre-gcc-4.4. OK, I'll check the current code and do something similar. Thanks,
On Wed, May 25, 2016 at 3:25 PM, Alex Bennée <alex.bennee@linaro.org> wrote: > Should we make the emitting of the function call/TCGop conditional on > MTTCG being enabled? If we are running in round-robin mode there is no > need to issue any fence operations. > Also, we should check if SMP(> 1 processors) is enabled since fences are not necessary on UP systems.
On Wed, May 25, 2016 at 3:43 PM, Sergey Fedorov <serge.fdrv@gmail.com> wrote: > > I think it would better not to defer native support for the operation. > It should be relatively simple instruction. Otherwise we could wind up > deferring this indefinitely. > Agreed. I will go with the native generation for now. Thanks,
On 25/05/16 22:59, Pranith Kumar wrote: > On Wed, May 25, 2016 at 3:43 PM, Sergey Fedorov <serge.fdrv@gmail.com> wrote: >> I think it would better not to defer native support for the operation. >> It should be relatively simple instruction. Otherwise we could wind up >> deferring this indefinitely. >> > Agreed. I will go with the native generation for now. I mean we'd better implement native support for all the supported host architectures right away. Kind regards, Sergey
Pranith Kumar <bobby.prani@gmail.com> writes: > Hi Richard, > > Thank you for the helpful comments. > > On Wed, May 25, 2016 at 1:35 PM, Richard Henderson <rth@twiddle.net> wrote: >> On 05/24/2016 10:18 AM, Pranith Kumar wrote: >>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h >>> index 92be341..93ea42e 100644 >>> --- a/tcg/i386/tcg-target.h >>> +++ b/tcg/i386/tcg-target.h >>> @@ -100,6 +100,7 @@ extern bool have_bmi1; >>> #define TCG_TARGET_HAS_muls2_i32 1 >>> #define TCG_TARGET_HAS_muluh_i32 0 >>> #define TCG_TARGET_HAS_mulsh_i32 0 >>> +#define TCG_TARGET_HAS_fence 1 >> >> >> This has to be defined for all hosts. > > OK. I will add an entry in tcg.h with default 0 and override in > individual architecture once it is implemented. > >>> @@ -347,6 +347,7 @@ static inline int >>> tcg_target_const_match(tcg_target_long val, TCGType type, >>> #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) >>> #define OPC_TESTL (0x85) >>> #define OPC_XCHG_ax_r32 (0x90) >>> +#define OPC_MFENCE (0xAE | P_EXT) >> >> Why define OPC_MFENCE if you're not going to use it? Of course, it's not >> exactly a complete and useful definition, so maybe just delete OPC_MFENCE. > > I want to use OPC_MFENCE instead of hard-coding the value in > tcg_out_fence(), but as you said the definition is not complete(it > currently generates only 0x0FAE). I am trying to figure out how to > generate 0x0FAEF0 using the definition. I think your going to have to just use tcg_out_fence() and the tcg_out_opc() does black magic with extra flag bits and assumes it is encoding rx registers (at least in 64 bit mode). However I would suggest a comment and maybe breakdown of the different fence types you can emit. Will pre-P4 processors never need mfences? > >> >> Also, for 32-bit you need to check for sse2 before outputting this. See >> also the existing cpuid checks in tcg_target_init and the fallback smp_mb >> definition for pre-gcc-4.4. > > OK, I'll check the current code and do something similar. > > Thanks, -- Alex Bennée
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 92be341..93ea42e 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -100,6 +100,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_fence 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 238fa10..cf49272 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -347,6 +347,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_TESTL (0x85) #define OPC_XCHG_ax_r32 (0x90) +#define OPC_MFENCE (0xAE | P_EXT) #define OPC_GRP3_Ev (0xf7) #define OPC_GRP5 (0xff) @@ -686,6 +687,14 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } +static inline void tcg_out_fence(TCGContext *s) +{ + /* TODO: Figure out an appropriate place for the encoding */ + tcg_out8(s, 0x0F); + tcg_out8(s, 0xAE); + tcg_out8(s, 0xF0); +} + static inline void tcg_out_push(TCGContext *s, int reg) { tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 4696cf1..b772d90 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -42,7 +42,7 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END) # define IMPL64 TCG_OPF_64BIT #endif -DEF(fence, 0, 0, 0, 0) +DEF(fence, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tcg.c b/tcg/tcg.c index b5a22ba..461a33e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2444,6 +2444,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) dead_args, sync_args); break; case INDEX_op_fence: + tcg_out_fence(s); break; default: /* Sanity check that we've not introduced any unhandled opcodes. */
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> --- tcg/i386/tcg-target.h | 1 + tcg/i386/tcg-target.inc.c | 9 +++++++++ tcg/tcg-opc.h | 2 +- tcg/tcg.c | 1 + 4 files changed, 12 insertions(+), 1 deletion(-)