x86emul: relax asm() constraints

Message ID	56C1CD5902000078000D2107@prv-mh.provo.novell.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xen.org> Message-Id: <56C1CD5902000078000D2107@prv-mh.provo.novell.com> Date: Mon, 15 Feb 2016 05:06:33 -0700 From: "Jan Beulich" <JBeulich@suse.com> To: "xen-devel" <xen-devel@lists.xenproject.org> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part192E8359.1__=" Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Keir Fraser <keir@xen.org> Subject: [Xen-devel] [PATCH] x86emul: relax asm() constraints Precedence: list Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org

Message ID

56C1CD5902000078000D2107@prv-mh.provo.novell.com (mailing list archive)

State

New, archived

Headers

Message-Id: <56C1CD5902000078000D2107@prv-mh.provo.novell.com>
Date: Mon, 15 Feb 2016 05:06:33 -0700
From: "Jan Beulich" <JBeulich@suse.com>
To: "xen-devel" <xen-devel@lists.xenproject.org>
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="=__Part192E8359.1__="
Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Keir Fraser <keir@xen.org>
Subject: [Xen-devel] [PATCH] x86emul: relax asm() constraints
Precedence: list
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org

Commit Message

Jan Beulich Feb. 15, 2016, 12:06 p.m. UTC

Let's give the compiler as much liberty in picking instruction operands
as possible. Also drop unnecessary size modifiers when the correct size
can already be derived from the asm() operands. Finally also drop an
"unsigned" from idiv_dbl()'s second parameter, allowing a cast to be
eliminated.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86emul: relax asm() constraints

Let's give the compiler as much liberty in picking instruction operands
as possible. Also drop unnecessary size modifiers when the correct size
can already be derived from the asm() operands. Finally also drop an
"unsigned" from idiv_dbl()'s second parameter, allowing a cast to be
eliminated.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -611,7 +611,7 @@ do {
  */
 static bool_t even_parity(uint8_t v)
 {
-    asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) );
+    asm ( "test %1,%1; setp %0" : "=qm" (v) : "q" (v) );
     return v;
 }
 
@@ -813,9 +813,9 @@ static int read_ulong(
  */
 static bool_t mul_dbl(unsigned long m[2])
 {
-    bool_t rc = 0;
-    asm ( "mul %1; seto %b2"
-          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
+    bool_t rc;
+    asm ( "mul %1; seto %2"
+          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
     return rc;
 }
 
@@ -826,9 +826,9 @@ static bool_t mul_dbl(unsigned long m[2]
  */
 static bool_t imul_dbl(unsigned long m[2])
 {
-    bool_t rc = 0;
+    bool_t rc;
     asm ( "imul %1; seto %b2"
-          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
+          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
     return rc;
 }
 
@@ -854,9 +854,9 @@ static bool_t div_dbl(unsigned long u[2]
  * NB. We don't use idiv directly as it's moderately hard to work out
  *     ahead of time whether it will #DE, which we cannot allow to happen.
  */
-static bool_t idiv_dbl(unsigned long u[2], unsigned long v)
+static bool_t idiv_dbl(unsigned long u[2], long v)
 {
-    bool_t negu = (long)u[1] < 0, negv = (long)v < 0;
+    bool_t negu = (long)u[1] < 0, negv = v < 0;
 
     /* u = abs(u) */
     if ( negu )
@@ -4542,9 +4542,10 @@ x86_emulate(
 
     case 0xbc: /* bsf or tzcnt */ {
         bool_t zf;
-        asm ( "bsf %2,%0; setz %b1"
+
+        asm ( "bsf %2,%0; setz %1"
               : "=r" (dst.val), "=q" (zf)
-              : "r" (src.val) );
+              : "rm" (src.val) );
         _regs.eflags &= ~EFLG_ZF;
         if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
         {
@@ -4567,9 +4568,10 @@ x86_emulate(
 
     case 0xbd: /* bsr or lzcnt */ {
         bool_t zf;
-        asm ( "bsr %2,%0; setz %b1"
+
+        asm ( "bsr %2,%0; setz %1"
               : "=r" (dst.val), "=q" (zf)
-              : "r" (src.val) );
+              : "rm" (src.val) );
         _regs.eflags &= ~EFLG_ZF;
         if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
         {
@@ -4698,7 +4700,7 @@ x86_emulate(
             break;
         case 4:
 #ifdef __x86_64__
-            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
+            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
             break;
         case 8:
 #endif

Comments

Andrew Cooper Feb. 15, 2016, 1:39 p.m. UTC | #1

On 15/02/16 12:06, Jan Beulich wrote:
> Let's give the compiler as much liberty in picking instruction operands
> as possible. Also drop unnecessary size modifiers when the correct size
> can already be derived from the asm() operands. Finally also drop an
> "unsigned" from idiv_dbl()'s second parameter, allowing a cast to be
> eliminated.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -611,7 +611,7 @@ do {
>   */
>  static bool_t even_parity(uint8_t v)
>  {
> -    asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) );
> +    asm ( "test %1,%1; setp %0" : "=qm" (v) : "q" (v) );
>      return v;
>  }
>  
> @@ -813,9 +813,9 @@ static int read_ulong(
>   */
>  static bool_t mul_dbl(unsigned long m[2])
>  {
> -    bool_t rc = 0;
> -    asm ( "mul %1; seto %b2"
> -          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
> +    bool_t rc;
> +    asm ( "mul %1; seto %2"
> +          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
>      return rc;
>  }
>  
> @@ -826,9 +826,9 @@ static bool_t mul_dbl(unsigned long m[2]
>   */
>  static bool_t imul_dbl(unsigned long m[2])
>  {
> -    bool_t rc = 0;
> +    bool_t rc;
>      asm ( "imul %1; seto %b2"
> -          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
> +          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
>      return rc;
>  }
>  
> @@ -854,9 +854,9 @@ static bool_t div_dbl(unsigned long u[2]
>   * NB. We don't use idiv directly as it's moderately hard to work out
>   *     ahead of time whether it will #DE, which we cannot allow to happen.
>   */
> -static bool_t idiv_dbl(unsigned long u[2], unsigned long v)
> +static bool_t idiv_dbl(unsigned long u[2], long v)
>  {
> -    bool_t negu = (long)u[1] < 0, negv = (long)v < 0;
> +    bool_t negu = (long)u[1] < 0, negv = v < 0;
>  
>      /* u = abs(u) */
>      if ( negu )
> @@ -4542,9 +4542,10 @@ x86_emulate(
>  
>      case 0xbc: /* bsf or tzcnt */ {
>          bool_t zf;
> -        asm ( "bsf %2,%0; setz %b1"
> +
> +        asm ( "bsf %2,%0; setz %1"
>                : "=r" (dst.val), "=q" (zf)

This =q could become =qm, like the even_parity() change.

> -              : "r" (src.val) );
> +              : "rm" (src.val) );
>          _regs.eflags &= ~EFLG_ZF;
>          if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
>          {
> @@ -4567,9 +4568,10 @@ x86_emulate(
>  
>      case 0xbd: /* bsr or lzcnt */ {
>          bool_t zf;
> -        asm ( "bsr %2,%0; setz %b1"
> +
> +        asm ( "bsr %2,%0; setz %1"
>                : "=r" (dst.val), "=q" (zf)
> -              : "r" (src.val) );
> +              : "rm" (src.val) );
>          _regs.eflags &= ~EFLG_ZF;
>          if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
>          {
> @@ -4698,7 +4700,7 @@ x86_emulate(
>              break;
>          case 4:
>  #ifdef __x86_64__
> -            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
> +            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );

What is the purpose of both the explicit cast and k constraint?

~Andrew

Jan Beulich Feb. 15, 2016, 2:24 p.m. UTC | #2

>>> On 15.02.16 at 14:39, <andrew.cooper3@citrix.com> wrote:
> On 15/02/16 12:06, Jan Beulich wrote:
>> @@ -4542,9 +4542,10 @@ x86_emulate(
>>  
>>      case 0xbc: /* bsf or tzcnt */ {
>>          bool_t zf;
>> -        asm ( "bsf %2,%0; setz %b1"
>> +
>> +        asm ( "bsf %2,%0; setz %1"
>>                : "=r" (dst.val), "=q" (zf)
> 
> This =q could become =qm, like the even_parity() change.

Ah, indeed. And there are a couple more.

>> @@ -4698,7 +4700,7 @@ x86_emulate(
>>              break;
>>          case 4:
>>  #ifdef __x86_64__
>> -            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
>> +            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
> 
> What is the purpose of both the explicit cast and k constraint?

Operand size gets (or at least may get) derived from the output
operand. While we could also constrain that one to 32 bits, it
seems better to have the whole dst.val written just in case. Of
src.val, otoh, we definitely only need to load the low 32 bits
(possibly saving a REX prefix), and we also definitely need to
force the bswap to have 32-bit operand size.

Jan

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -611,7 +611,7 @@  do {
  */
 static bool_t even_parity(uint8_t v)
 {
-    asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) );
+    asm ( "test %1,%1; setp %0" : "=qm" (v) : "q" (v) );
     return v;
 }
 
@@ -813,9 +813,9 @@  static int read_ulong(
  */
 static bool_t mul_dbl(unsigned long m[2])
 {
-    bool_t rc = 0;
-    asm ( "mul %1; seto %b2"
-          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
+    bool_t rc;
+    asm ( "mul %1; seto %2"
+          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
     return rc;
 }
 
@@ -826,9 +826,9 @@  static bool_t mul_dbl(unsigned long m[2]
  */
 static bool_t imul_dbl(unsigned long m[2])
 {
-    bool_t rc = 0;
+    bool_t rc;
     asm ( "imul %1; seto %b2"
-          : "+a" (m[0]), "+d" (m[1]), "+q" (rc) );
+          : "+a" (m[0]), "+d" (m[1]), "=q" (rc) );
     return rc;
 }
 
@@ -854,9 +854,9 @@  static bool_t div_dbl(unsigned long u[2]
  * NB. We don't use idiv directly as it's moderately hard to work out
  *     ahead of time whether it will #DE, which we cannot allow to happen.
  */
-static bool_t idiv_dbl(unsigned long u[2], unsigned long v)
+static bool_t idiv_dbl(unsigned long u[2], long v)
 {
-    bool_t negu = (long)u[1] < 0, negv = (long)v < 0;
+    bool_t negu = (long)u[1] < 0, negv = v < 0;
 
     /* u = abs(u) */
     if ( negu )
@@ -4542,9 +4542,10 @@  x86_emulate(
 
     case 0xbc: /* bsf or tzcnt */ {
         bool_t zf;
-        asm ( "bsf %2,%0; setz %b1"
+
+        asm ( "bsf %2,%0; setz %1"
               : "=r" (dst.val), "=q" (zf)
-              : "r" (src.val) );
+              : "rm" (src.val) );
         _regs.eflags &= ~EFLG_ZF;
         if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
         {
@@ -4567,9 +4568,10 @@  x86_emulate(
 
     case 0xbd: /* bsr or lzcnt */ {
         bool_t zf;
-        asm ( "bsr %2,%0; setz %b1"
+
+        asm ( "bsr %2,%0; setz %1"
               : "=r" (dst.val), "=q" (zf)
-              : "r" (src.val) );
+              : "rm" (src.val) );
         _regs.eflags &= ~EFLG_ZF;
         if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
         {
@@ -4698,7 +4700,7 @@  x86_emulate(
             break;
         case 4:
 #ifdef __x86_64__
-            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
+            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
             break;
         case 8:
 #endif

x86emul: relax asm() constraints

Commit Message

Comments

Patch