diff mbox series

[v2,2/2] x86: detect CMOS aliasing on ports other than 0x70/0x71

Message ID 72a63cba-bfdb-ae3c-284b-8ba5b9d7f7a9@suse.com (mailing list archive)
State Superseded
Headers show
Series x86: RTC handling adjustments | expand

Commit Message

Jan Beulich July 15, 2020, 9:47 a.m. UTC
... in order to also intercept accesses through the alias ports.

Also stop intercepting accesses to the CMOS ports if we won't ourselves
use the CMOS RTC.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base.

Comments

Roger Pau Monné July 16, 2020, 2:31 p.m. UTC | #1
On Wed, Jul 15, 2020 at 11:47:56AM +0200, Jan Beulich wrote:
> ... in order to also intercept accesses through the alias ports.
> 
> Also stop intercepting accesses to the CMOS ports if we won't ourselves
> use the CMOS RTC.

I think you are missing the registration of the aliased ports in
rtc_init for a PVH hardware domain, hw_rtc_io will currently only get
called by accesses to 0x71-0x71.

> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Re-base.
> 
> --- a/xen/arch/x86/physdev.c
> +++ b/xen/arch/x86/physdev.c
> @@ -670,6 +670,80 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
>      return ret;
>  }
>  
> +#ifndef COMPAT
> +#include <asm/mc146818rtc.h>
> +
> +unsigned int __read_mostly cmos_alias_mask;
> +
> +static int __init probe_cmos_alias(void)
> +{
> +    unsigned int i, offs;
> +
> +    if ( acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC )
> +        return 0;
> +
> +    for ( offs = 2; offs < 8; offs <<= 1 )
> +    {
> +        bool read = true;
> +
> +        for ( i = RTC_REG_D + 1; i < 0x80; ++i )
> +        {
> +            uint8_t normal, alt;
> +            unsigned long flags;
> +
> +            if ( i == acpi_gbl_FADT.century )
> +                continue;

I'm missing something, why do you avoid the century register for
comparison reasons?

> @@ -2009,37 +2009,33 @@ int __hwdom_init xen_in_range(unsigned l
>  static int __hwdom_init io_bitmap_cb(unsigned long s, unsigned long e,
>                                       void *ctx)
>  {
> -    struct domain *d = ctx;
> +    const struct domain *d = ctx;

Urg, it's kind of weird to constify d ...

>      unsigned int i;
>  
>      ASSERT(e <= INT_MAX);
>      for ( i = s; i <= e; i++ )
> -        __clear_bit(i, d->arch.hvm.io_bitmap);
> +        if ( admin_io_okay(i, 1, d) )
> +            __clear_bit(i, d->arch.hvm.io_bitmap);

... when you are modifying the bitmap here.

>  
>      return 0;
>  }
>  
>  void __hwdom_init setup_io_bitmap(struct domain *d)
>  {
> -    int rc;
> +    if ( !is_hvm_domain(d) )
> +        return;
>  
> -    if ( is_hvm_domain(d) )
> -    {
> -        bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
> -        rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
> -                                    io_bitmap_cb, d);
> -        BUG_ON(rc);
> -        /*
> -         * NB: we need to trap accesses to 0xcf8 in order to intercept
> -         * 4 byte accesses, that need to be handled by Xen in order to
> -         * keep consistency.
> -         * Access to 1 byte RTC ports also needs to be trapped in order
> -         * to keep consistency with PV.
> -         */
> -        __set_bit(0xcf8, d->arch.hvm.io_bitmap);
> -        __set_bit(RTC_PORT(0), d->arch.hvm.io_bitmap);
> -        __set_bit(RTC_PORT(1), d->arch.hvm.io_bitmap);
> -    }
> +    bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
> +    if ( rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
> +                                io_bitmap_cb, d) )
> +        BUG();

You can directly use BUG_ON, no need for the if. IIRC it's safe to
call admin_io_okay (and thus ioports_access_permitted) when already
holding the rangeset lock, as both are read-lockers and can safely
recurse.

> +
> +    /*
> +     * We need to trap 4-byte accesses to 0xcf8 (see admin_io_okay(),
> +     * guest_io_read(), and guest_io_write()), which isn't covered by
> +     * the admin_io_okay() check in io_bitmap_cb().
> +     */
> +    __set_bit(0xcf8, d->arch.hvm.io_bitmap);
>  }
>  
>  /*
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -1092,7 +1092,10 @@ static unsigned long get_cmos_time(void)
>          if ( seconds < 60 )
>          {
>              if ( rtc.sec != seconds )
> +            {
>                  cmos_rtc_probe = false;
> +                acpi_gbl_FADT.boot_flags &= ~ACPI_FADT_NO_CMOS_RTC;

Do you need to set this flag also when using the EFI runtime services
in order to get the time in get_cmos_time? In that case the RTC is not
use, and hence could be handled to the hardware domain?

> +            }
>              break;
>          }
>  
> @@ -1114,7 +1117,7 @@ unsigned int rtc_guest_read(unsigned int
>      unsigned long flags;
>      unsigned int data = ~0;
>  
> -    switch ( port )
> +    switch ( port & ~cmos_alias_mask )
>      {
>      case RTC_PORT(0):
>          /*
> @@ -1126,11 +1129,12 @@ unsigned int rtc_guest_read(unsigned int
>          break;
>  
>      case RTC_PORT(1):
> -        if ( !ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
> +        if ( !ioports_access_permitted(currd, port - 1, port) )
>              break;
>          spin_lock_irqsave(&rtc_lock, flags);
> -        outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
> -        data = inb(RTC_PORT(1));
> +        outb(currd->arch.cmos_idx & (0xff >> (port == RTC_PORT(1))),

Why do you only mask this for accesses to the non aliased ports? If
the RTC is aliased you also want to mask the aliased accesses in the
same way?

Thanks, Roger.
Jan Beulich July 17, 2020, 10 a.m. UTC | #2
On 16.07.2020 16:31, Roger Pau Monné wrote:
> On Wed, Jul 15, 2020 at 11:47:56AM +0200, Jan Beulich wrote:
>> ... in order to also intercept accesses through the alias ports.
>>
>> Also stop intercepting accesses to the CMOS ports if we won't ourselves
>> use the CMOS RTC.
> 
> I think you are missing the registration of the aliased ports in
> rtc_init for a PVH hardware domain, hw_rtc_io will currently only get
> called by accesses to 0x71-0x71.

Oh, right - a re-basing oversight. Thanks for noticing. (It's not
just the registration that's missing, but also the avoiding of it
in case ACPI_FADT_NO_CMOS_RTC is set.)

>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: Re-base.
>>
>> --- a/xen/arch/x86/physdev.c
>> +++ b/xen/arch/x86/physdev.c
>> @@ -670,6 +670,80 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
>>      return ret;
>>  }
>>  
>> +#ifndef COMPAT
>> +#include <asm/mc146818rtc.h>
>> +
>> +unsigned int __read_mostly cmos_alias_mask;
>> +
>> +static int __init probe_cmos_alias(void)
>> +{
>> +    unsigned int i, offs;
>> +
>> +    if ( acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC )
>> +        return 0;
>> +
>> +    for ( offs = 2; offs < 8; offs <<= 1 )
>> +    {
>> +        bool read = true;
>> +
>> +        for ( i = RTC_REG_D + 1; i < 0x80; ++i )
>> +        {
>> +            uint8_t normal, alt;
>> +            unsigned long flags;
>> +
>> +            if ( i == acpi_gbl_FADT.century )
>> +                continue;
> 
> I'm missing something, why do you avoid the century register for
> comparison reasons?

Just like the other RTC registers - their contents may change behind
our backs, here e.g. over New Year between two centuries.

>> @@ -2009,37 +2009,33 @@ int __hwdom_init xen_in_range(unsigned l
>>  static int __hwdom_init io_bitmap_cb(unsigned long s, unsigned long e,
>>                                       void *ctx)
>>  {
>> -    struct domain *d = ctx;
>> +    const struct domain *d = ctx;
> 
> Urg, it's kind of weird to constify d ...
> 
>>      unsigned int i;
>>  
>>      ASSERT(e <= INT_MAX);
>>      for ( i = s; i <= e; i++ )
>> -        __clear_bit(i, d->arch.hvm.io_bitmap);
>> +        if ( admin_io_okay(i, 1, d) )
>> +            __clear_bit(i, d->arch.hvm.io_bitmap);
> 
> ... when you are modifying the bitmap here.

Well - I'm not modifying what d points to. In principle these I/O
bitmaps are shared; It's just Dom0 which gets a separate one. So
modifying the bitmap really is unrelated to modifying struct domain.

>>  void __hwdom_init setup_io_bitmap(struct domain *d)
>>  {
>> -    int rc;
>> +    if ( !is_hvm_domain(d) )
>> +        return;
>>  
>> -    if ( is_hvm_domain(d) )
>> -    {
>> -        bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
>> -        rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
>> -                                    io_bitmap_cb, d);
>> -        BUG_ON(rc);
>> -        /*
>> -         * NB: we need to trap accesses to 0xcf8 in order to intercept
>> -         * 4 byte accesses, that need to be handled by Xen in order to
>> -         * keep consistency.
>> -         * Access to 1 byte RTC ports also needs to be trapped in order
>> -         * to keep consistency with PV.
>> -         */
>> -        __set_bit(0xcf8, d->arch.hvm.io_bitmap);
>> -        __set_bit(RTC_PORT(0), d->arch.hvm.io_bitmap);
>> -        __set_bit(RTC_PORT(1), d->arch.hvm.io_bitmap);
>> -    }
>> +    bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
>> +    if ( rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
>> +                                io_bitmap_cb, d) )
>> +        BUG();
> 
> You can directly use BUG_ON, no need for the if.

Long ago we agreed to avoid BUG_ON() with expressions that have
required (side) effects. I.e. just like for ASSERT(), where the
expression wouldn't get evaluated at all when NDEBUG is defined.

> IIRC it's safe to
> call admin_io_okay (and thus ioports_access_permitted) when already
> holding the rangeset lock, as both are read-lockers and can safely
> recurse.

I'm afraid I don't see the connection of this remark to the
construct in question.

>> --- a/xen/arch/x86/time.c
>> +++ b/xen/arch/x86/time.c
>> @@ -1092,7 +1092,10 @@ static unsigned long get_cmos_time(void)
>>          if ( seconds < 60 )
>>          {
>>              if ( rtc.sec != seconds )
>> +            {
>>                  cmos_rtc_probe = false;
>> +                acpi_gbl_FADT.boot_flags &= ~ACPI_FADT_NO_CMOS_RTC;
> 
> Do you need to set this flag also when using the EFI runtime services
> in order to get the time in get_cmos_time? In that case the RTC is not
> use, and hence could be handled to the hardware domain?

Whether the EFI runtime services use the RTC is unknown. There are
specific precautions towards this in the UEFI spec, iirc.

>> @@ -1114,7 +1117,7 @@ unsigned int rtc_guest_read(unsigned int
>>      unsigned long flags;
>>      unsigned int data = ~0;
>>  
>> -    switch ( port )
>> +    switch ( port & ~cmos_alias_mask )
>>      {
>>      case RTC_PORT(0):
>>          /*
>> @@ -1126,11 +1129,12 @@ unsigned int rtc_guest_read(unsigned int
>>          break;
>>  
>>      case RTC_PORT(1):
>> -        if ( !ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
>> +        if ( !ioports_access_permitted(currd, port - 1, port) )
>>              break;
>>          spin_lock_irqsave(&rtc_lock, flags);
>> -        outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
>> -        data = inb(RTC_PORT(1));
>> +        outb(currd->arch.cmos_idx & (0xff >> (port == RTC_PORT(1))),
> 
> Why do you only mask this for accesses to the non aliased ports? If
> the RTC is aliased you also want to mask the aliased accesses in the
> same way?

Bit 7 in port 70 has a different meaning (NMI mask); you can access
RTC/CMOS bytes 0-127 only this way. There are chipsets which provide
256-byte CMOS, where the high half can be accessed via the aliases.

However, seeing this comment of yours I noticed that there's still a
related bug here: When the guest reads/writes the index port, I _also_
need to mask off the high bit when it's the non-aliased port that gets
accessed. Otherwise Dom0 writing port 74 but then reading port 71
could lead to bit 7 getting set in port 70.

Jan
diff mbox series

Patch

--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -670,6 +670,80 @@  ret_t do_physdev_op(int cmd, XEN_GUEST_H
     return ret;
 }
 
+#ifndef COMPAT
+#include <asm/mc146818rtc.h>
+
+unsigned int __read_mostly cmos_alias_mask;
+
+static int __init probe_cmos_alias(void)
+{
+    unsigned int i, offs;
+
+    if ( acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC )
+        return 0;
+
+    for ( offs = 2; offs < 8; offs <<= 1 )
+    {
+        bool read = true;
+
+        for ( i = RTC_REG_D + 1; i < 0x80; ++i )
+        {
+            uint8_t normal, alt;
+            unsigned long flags;
+
+            if ( i == acpi_gbl_FADT.century )
+                continue;
+
+            spin_lock_irqsave(&rtc_lock, flags);
+
+            normal = CMOS_READ(i);
+            if ( inb(RTC_PORT(offs)) != i )
+                read = false;
+
+            alt = inb(RTC_PORT(offs + 1));
+
+            spin_unlock_irqrestore(&rtc_lock, flags);
+
+            if ( normal != alt )
+                break;
+
+            process_pending_softirqs();
+        }
+        if ( i == 0x80 )
+        {
+            cmos_alias_mask |= offs;
+            printk(XENLOG_INFO "CMOS aliased at %02x, index %s\n",
+                   RTC_PORT(offs), read ? "r/w" : "w/o");
+        }
+    }
+
+    return 0;
+}
+__initcall(probe_cmos_alias);
+
+/* Has the administrator granted sufficient permission for this I/O access? */
+bool admin_io_okay(unsigned int port, unsigned int bytes,
+                   const struct domain *d)
+{
+    /*
+     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
+     * We never permit direct access to that register.
+     */
+    if ( (port == 0xcf8) && (bytes == 4) )
+        return false;
+
+    /*
+     * We also never permit direct access to the RTC/CMOS registers
+     * if we may be accessing the RTC ones ourselves.
+     */
+    if ( !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) &&
+         ((port & ~(cmos_alias_mask | 1)) == RTC_PORT(0)) )
+        return false;
+
+    return ioports_access_permitted(d, port, port + bytes - 1);
+}
+#endif /* COMPAT */
+
 /*
  * Local variables:
  * mode: C
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -198,24 +198,6 @@  static bool guest_io_okay(unsigned int p
     return false;
 }
 
-/* Has the administrator granted sufficient permission for this I/O access? */
-static bool admin_io_okay(unsigned int port, unsigned int bytes,
-                          const struct domain *d)
-{
-    /*
-     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
-     * We never permit direct access to that register.
-     */
-    if ( (port == 0xcf8) && (bytes == 4) )
-        return false;
-
-    /* We also never permit direct access to the RTC/CMOS registers. */
-    if ( ((port & ~1) == RTC_PORT(0)) )
-        return false;
-
-    return ioports_access_permitted(d, port, port + bytes - 1);
-}
-
 static bool pci_cfg_ok(struct domain *currd, unsigned int start,
                        unsigned int size, uint32_t *write)
 {
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -48,7 +48,7 @@ 
 #include <xen/cpu.h>
 #include <asm/nmi.h>
 #include <asm/alternative.h>
-#include <asm/mc146818rtc.h>
+#include <asm/iocap.h>
 #include <asm/cpuid.h>
 #include <asm/spec_ctrl.h>
 #include <asm/guest.h>
@@ -2009,37 +2009,33 @@  int __hwdom_init xen_in_range(unsigned l
 static int __hwdom_init io_bitmap_cb(unsigned long s, unsigned long e,
                                      void *ctx)
 {
-    struct domain *d = ctx;
+    const struct domain *d = ctx;
     unsigned int i;
 
     ASSERT(e <= INT_MAX);
     for ( i = s; i <= e; i++ )
-        __clear_bit(i, d->arch.hvm.io_bitmap);
+        if ( admin_io_okay(i, 1, d) )
+            __clear_bit(i, d->arch.hvm.io_bitmap);
 
     return 0;
 }
 
 void __hwdom_init setup_io_bitmap(struct domain *d)
 {
-    int rc;
+    if ( !is_hvm_domain(d) )
+        return;
 
-    if ( is_hvm_domain(d) )
-    {
-        bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
-        rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
-                                    io_bitmap_cb, d);
-        BUG_ON(rc);
-        /*
-         * NB: we need to trap accesses to 0xcf8 in order to intercept
-         * 4 byte accesses, that need to be handled by Xen in order to
-         * keep consistency.
-         * Access to 1 byte RTC ports also needs to be trapped in order
-         * to keep consistency with PV.
-         */
-        __set_bit(0xcf8, d->arch.hvm.io_bitmap);
-        __set_bit(RTC_PORT(0), d->arch.hvm.io_bitmap);
-        __set_bit(RTC_PORT(1), d->arch.hvm.io_bitmap);
-    }
+    bitmap_fill(d->arch.hvm.io_bitmap, 0x10000);
+    if ( rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
+                                io_bitmap_cb, d) )
+        BUG();
+
+    /*
+     * We need to trap 4-byte accesses to 0xcf8 (see admin_io_okay(),
+     * guest_io_read(), and guest_io_write()), which isn't covered by
+     * the admin_io_okay() check in io_bitmap_cb().
+     */
+    __set_bit(0xcf8, d->arch.hvm.io_bitmap);
 }
 
 /*
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1092,7 +1092,10 @@  static unsigned long get_cmos_time(void)
         if ( seconds < 60 )
         {
             if ( rtc.sec != seconds )
+            {
                 cmos_rtc_probe = false;
+                acpi_gbl_FADT.boot_flags &= ~ACPI_FADT_NO_CMOS_RTC;
+            }
             break;
         }
 
@@ -1114,7 +1117,7 @@  unsigned int rtc_guest_read(unsigned int
     unsigned long flags;
     unsigned int data = ~0;
 
-    switch ( port )
+    switch ( port & ~cmos_alias_mask )
     {
     case RTC_PORT(0):
         /*
@@ -1126,11 +1129,12 @@  unsigned int rtc_guest_read(unsigned int
         break;
 
     case RTC_PORT(1):
-        if ( !ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
+        if ( !ioports_access_permitted(currd, port - 1, port) )
             break;
         spin_lock_irqsave(&rtc_lock, flags);
-        outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
-        data = inb(RTC_PORT(1));
+        outb(currd->arch.cmos_idx & (0xff >> (port == RTC_PORT(1))),
+             port - 1);
+        data = inb(port);
         spin_unlock_irqrestore(&rtc_lock, flags);
         break;
 
@@ -1146,8 +1150,10 @@  void rtc_guest_write(unsigned int port,
     struct domain *currd = current->domain;
     unsigned long flags;
 
-    switch ( port )
+    switch ( port & ~cmos_alias_mask )
     {
+        unsigned int idx;
+
     case RTC_PORT(0):
         /*
          * All PV domains (and PVH dom0) are allowed to write to the latched
@@ -1158,15 +1164,17 @@  void rtc_guest_write(unsigned int port,
         break;
 
     case RTC_PORT(1):
-        if ( !ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
+        if ( !ioports_access_permitted(currd, port - 1, port) )
             break;
 
+        idx = currd->arch.cmos_idx & (0xff >> (port == RTC_PORT(1)));
+
         if ( pv_rtc_handler )
-            pv_rtc_handler(currd->arch.cmos_idx & 0x7f, data);
+            pv_rtc_handler(idx, data);
 
         spin_lock_irqsave(&rtc_lock, flags);
-        outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
-        outb(data, RTC_PORT(1));
+        outb(idx, port - 1);
+        outb(data, port);
         spin_unlock_irqrestore(&rtc_lock, flags);
         break;
 
--- a/xen/include/asm-x86/iocap.h
+++ b/xen/include/asm-x86/iocap.h
@@ -18,4 +18,7 @@ 
     (!rangeset_is_empty((d)->iomem_caps) ||             \
      !rangeset_is_empty((d)->arch.ioport_caps))
 
+bool admin_io_okay(unsigned int port, unsigned int bytes,
+                   const struct domain *d);
+
 #endif /* __X86_IOCAP_H__ */
--- a/xen/include/asm-x86/mc146818rtc.h
+++ b/xen/include/asm-x86/mc146818rtc.h
@@ -9,6 +9,8 @@ 
 
 extern spinlock_t rtc_lock;             /* serialize CMOS RAM access */
 
+extern unsigned int cmos_alias_mask;
+
 /**********************************************************************
  * register summary
  **********************************************************************/