Message ID | 56B2032902000078000CE03C@prv-mh.provo.novell.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03/02/16 12:39, Jan Beulich wrote: > From: Shuai Ruan <shuai.ruan@linux.intel.com> > > This patch use alternavtive asm on the xsave side. > As xsaves use modified optimization like xsaveopt, xsaves > may not writing the FPU portion of the save image too. > So xsaves also need some extra tweaks. > > Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com> > > Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to > cover XSAVES as well. Re-wrap comment being adjusted. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/xstate.c > +++ b/xen/arch/x86/xstate.c > @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask > uint32_t hmask = mask >> 32; > uint32_t lmask = mask; > int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1; > +#define XSAVE(pfx) \ > + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \ > + ".byte " pfx "0x0f,0xae,0x37\n", \ > + X86_FEATURE_XSAVEOPT, \ > + ".byte " pfx "0x0f,0xc7,0x27\n", \ > + X86_FEATURE_XSAVEC, \ > + ".byte " pfx "0x0f,0xc7,0x2f\n", \ > + X86_FEATURE_XSAVES, \ Given that the options are a little out of order and using raw bytes, would you mind annotating the lines with the operations. e.g. + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \ + ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \ + X86_FEATURE_XSAVEOPT, \ + ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \ + X86_FEATURE_XSAVEC, \ + ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \ + X86_FEATURE_XSAVES, \ IMO, this is somewhat clearer to read. Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> On 03.02.16 at 14:26, <andrew.cooper3@citrix.com> wrote: > On 03/02/16 12:39, Jan Beulich wrote: >> --- a/xen/arch/x86/xstate.c >> +++ b/xen/arch/x86/xstate.c >> @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask >> uint32_t hmask = mask >> 32; >> uint32_t lmask = mask; >> int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1; >> +#define XSAVE(pfx) \ >> + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \ >> + ".byte " pfx "0x0f,0xae,0x37\n", \ >> + X86_FEATURE_XSAVEOPT, \ >> + ".byte " pfx "0x0f,0xc7,0x27\n", \ >> + X86_FEATURE_XSAVEC, \ >> + ".byte " pfx "0x0f,0xc7,0x2f\n", \ >> + X86_FEATURE_XSAVES, \ > > Given that the options are a little out of order and using raw bytes, > would you mind annotating the lines with the operations. e.g. > > + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \ > + ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \ > + X86_FEATURE_XSAVEOPT, \ > + ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \ > + X86_FEATURE_XSAVEC, \ > + ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \ > + X86_FEATURE_XSAVES, \ > > IMO, this is somewhat clearer to read. Okay, since I had been considering this too, I've just done so. > Otherwise, > > Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Thanks, Jan
On 03/02/16 13:34, Jan Beulich wrote: >>>> On 03.02.16 at 14:26, <andrew.cooper3@citrix.com> wrote: >> On 03/02/16 12:39, Jan Beulich wrote: >>> --- a/xen/arch/x86/xstate.c >>> +++ b/xen/arch/x86/xstate.c >>> @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask >>> uint32_t hmask = mask >> 32; >>> uint32_t lmask = mask; >>> int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1; >>> +#define XSAVE(pfx) \ >>> + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \ >>> + ".byte " pfx "0x0f,0xae,0x37\n", \ >>> + X86_FEATURE_XSAVEOPT, \ >>> + ".byte " pfx "0x0f,0xc7,0x27\n", \ >>> + X86_FEATURE_XSAVEC, \ >>> + ".byte " pfx "0x0f,0xc7,0x2f\n", \ >>> + X86_FEATURE_XSAVES, \ >> Given that the options are a little out of order and using raw bytes, >> would you mind annotating the lines with the operations. e.g. >> >> + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \ >> + ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \ >> + X86_FEATURE_XSAVEOPT, \ >> + ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \ >> + X86_FEATURE_XSAVEC, \ >> + ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \ >> + X86_FEATURE_XSAVES, \ >> >> IMO, this is somewhat clearer to read. > Okay, since I had been considering this too, I've just done so. > >> Otherwise, >> >> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> > Thanks, Jan On further thoughts, it would be nice to annotate the XRSTOR() side as well. ~Andrew
On Wed, Feb 03, 2016 at 05:39:53AM -0700, Jan Beulich wrote: > From: Shuai Ruan <shuai.ruan@linux.intel.com> > > This patch use alternavtive asm on the xsave side. > As xsaves use modified optimization like xsaveopt, xsaves > may not writing the FPU portion of the save image too. > So xsaves also need some extra tweaks. > > Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com> > > Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to > cover XSAVES as well. Re-wrap comment being adjusted. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> Thanks for pointing out the bug and fix it. And I test the V2 patch, it works well.
--- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask uint32_t hmask = mask >> 32; uint32_t lmask = mask; int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1; +#define XSAVE(pfx) \ + alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \ + ".byte " pfx "0x0f,0xae,0x37\n", \ + X86_FEATURE_XSAVEOPT, \ + ".byte " pfx "0x0f,0xc7,0x27\n", \ + X86_FEATURE_XSAVEC, \ + ".byte " pfx "0x0f,0xc7,0x2f\n", \ + X86_FEATURE_XSAVES, \ + "=m" (*ptr), \ + "a" (lmask), "d" (hmask), "D" (ptr)) if ( word_size <= 0 || !is_pv_32bit_vcpu(v) ) { typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; - if ( cpu_has_xsaves ) - asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - else if ( cpu_has_xsavec ) - asm volatile ( ".byte 0x48,0x0f,0xc7,0x27" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - else if ( cpu_has_xsaveopt ) + if ( cpu_has_xsaveopt || cpu_has_xsaves ) { /* - * xsaveopt may not write the FPU portion even when the respective - * mask bit is set. For the check further down to work we hence - * need to put the save image back into the state that it was in - * right after the previous xsaveopt. + * XSAVEOPT/XSAVES may not write the FPU portion even when the + * respective mask bit is set. For the check further down to work + * we hence need to put the save image back into the state that + * it was in right after the previous XSAVEOPT. */ if ( word_size > 0 && (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 || @@ -279,14 +281,9 @@ void xsave(struct vcpu *v, uint64_t mask ptr->fpu_sse.fip.sel = 0; ptr->fpu_sse.fdp.sel = 0; } - asm volatile ( ".byte 0x48,0x0f,0xae,0x37" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); } - else - asm volatile ( ".byte 0x48,0x0f,0xae,0x27" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); + + XSAVE("0x48,"); if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) || /* @@ -296,7 +293,7 @@ void xsave(struct vcpu *v, uint64_t mask (!(ptr->fpu_sse.fsw & 0x0080) && boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ) { - if ( cpu_has_xsaveopt && word_size > 0 ) + if ( (cpu_has_xsaveopt || cpu_has_xsaves) && word_size > 0 ) { ptr->fpu_sse.fip.sel = fcs; ptr->fpu_sse.fdp.sel = fds; @@ -317,24 +314,10 @@ void xsave(struct vcpu *v, uint64_t mask } else { - if ( cpu_has_xsaves ) - asm volatile ( ".byte 0x0f,0xc7,0x2f" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - else if ( cpu_has_xsavec ) - asm volatile ( ".byte 0x0f,0xc7,0x27" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - else if ( cpu_has_xsaveopt ) - asm volatile ( ".byte 0x0f,0xae,0x37" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - else - asm volatile ( ".byte 0x0f,0xae,0x27" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); + XSAVE(""); word_size = 4; } +#undef XSAVE if ( word_size >= 0 ) ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size; }