diff mbox series

[v2,3/3] x86: detect PIT aliasing on ports other than 0x4[0-3]

Message ID 039b9ceb-4862-4e26-a344-e47fc04bd979@suse.com (mailing list archive)
State New, archived
Headers show
Series x86: Dom0 I/O port access permissions | expand

Commit Message

Jan Beulich Dec. 18, 2023, 2:48 p.m. UTC
... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to PIT. Unlike
for CMOS/RTC, do detection pretty early, to avoid disturbing normal
operation later on (even if typically we won't use much of the PIT).

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects (beyond such that PIT
reads have anyway) in case it does not alias the PIT's.

At to the port 0x61 accesses: Unlike other accesses we do, this masks
off the top four bits (in addition to the bottom two ones), following
Intel chipset documentation saying that these (read-only) bits should
only be written with zero.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
If Xen was running on top of another instance of itself (in HVM mode,
not PVH, i.e. not as a shim), prior to 14f42af3f52d ('x86/vPIT: account
for "counter stopped" time') I'm afraid our vPIT logic would not have
allowed the "Try to further make sure ..." check to pass in the Xen
running on top: We don't respect the gate bit being clear when handling
counter reads. (There are more unhandled [and unmentioned as being so]
aspects of PIT behavior though, yet it's unclear in how far addressing
at least some of them would be useful.)
---
v2: Use new command line option. Re-base over changes to earlier
    patches. Use ISOLATE_LSB().

Comments

Jason Andryuk May 10, 2024, 5:40 p.m. UTC | #1
On 2023-12-18 09:48, Jan Beulich wrote:
> ... in order to also deny Dom0 access through the alias ports. Without
> this it is only giving the impression of denying access to PIT. Unlike
> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
> operation later on (even if typically we won't use much of the PIT).
> 
> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> from the probed alias port won't have side effects (beyond such that PIT
> reads have anyway) in case it does not alias the PIT's.
> 
> At to the port 0x61 accesses: Unlike other accesses we do, this masks

s/At/As/

> off the top four bits (in addition to the bottom two ones), following
> Intel chipset documentation saying that these (read-only) bits should
> only be written with zero.

This might be more useful in a comment next to `& 0x0c`.

> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> If Xen was running on top of another instance of itself (in HVM mode,
> not PVH, i.e. not as a shim), prior to 14f42af3f52d ('x86/vPIT: account
> for "counter stopped" time') I'm afraid our vPIT logic would not have
> allowed the "Try to further make sure ..." check to pass in the Xen
> running on top: We don't respect the gate bit being clear when handling
> counter reads. (There are more unhandled [and unmentioned as being so]
> aspects of PIT behavior though, yet it's unclear in how far addressing
> at least some of them would be useful.)
> ---
> v2: Use new command line option. Re-base over changes to earlier
>      patches. Use ISOLATE_LSB().
> 
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -495,7 +495,11 @@ int __init dom0_setup_permissions(struct
>       rc |= ioports_deny_access(d, 0x4D0, 0x4D1);
>   
>       /* Interval Timer (PIT). */
> -    rc |= ioports_deny_access(d, 0x40, 0x43);
> +    for ( offs = 0, i = ISOLATE_LSB(pit_alias_mask) ?: 4;
> +          offs <= pit_alias_mask; offs += i )
> +        if ( !(offs & ~pit_alias_mask) )
> +            rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs);
> +
>       /* PIT Channel 2 / PC Speaker Control. */
>       rc |= ioports_deny_access(d, 0x61, 0x61);
>   
> --- a/xen/arch/x86/include/asm/setup.h
> +++ b/xen/arch/x86/include/asm/setup.h
> @@ -47,6 +47,7 @@ extern unsigned long highmem_start;
>   #endif
>   
>   extern unsigned int i8259A_alias_mask;
> +extern unsigned int pit_alias_mask;
>   
>   extern int8_t opt_smt;
>   extern int8_t opt_probe_port_aliases;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -425,6 +425,72 @@ static struct platform_timesource __init
>       .resume = resume_pit,
>   };
>   
> +unsigned int __initdata pit_alias_mask;
> +
> +static void __init probe_pit_alias(void)
> +{
> +    unsigned int mask = 0x1c;
> +    uint8_t val = 0;
> +
> +    if ( !opt_probe_port_aliases )
> +        return;
> +
> +    /*
> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
> +     * count is loaded independent of counting being / becoming enabled.  Thus
> +     * we have a 16-bit value fully under our control, to write and then check
> +     * whether we can also read it back unaltered.
> +     */
> +
> +    /* Turn off speaker output and disable channel 2 counting. */
> +    outb(inb(0x61) & 0x0c, 0x61);
> +
> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */

Channel 2, Lobyte/Hibyte, 0b000 Mode 0, (Binary)

#define PIT_MODE_CH2 (2 << 6)
#define PIT_MODE0_16BIT ((3 << 4) | (0 << 1))

outb(PIT_MODE_CH2 | PIT_MODE0_16BIT, PIT_MODE);

> +
> +    do {
> +        uint8_t val2;
> +        unsigned int offs;
> +
> +        outb(val, PIT_CH2);
> +        outb(val ^ 0xff, PIT_CH2);
> +
> +        /* Wait for the Null Count bit to clear. */
> +        do {
> +            /* Latch status. */
> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);

Read-back, Latch status,  read back timer channel 2

> +
> +            /* Try to make sure we're actually having a PIT here. */
> +            val2 = inb(PIT_CH2);
> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )

if ( (val2 & PIT_RB_MASK) != PIT_MODE0_16BIT )

I think particularly a define for PIT_MODE0_16BIT would be helpful to 
show what is expected to be the same.

> +                return;
> +        } while ( val2 & (1 << 6) );

I think Roger might have mentioned on an earlier version - would it make 
sense to have a counter to prevent looping forever?

Also, FYI, I tested the series.  My test machine didn't show any aliasing.

Thanks,
Jason

> +
> +        /*
> +         * Try to further make sure we're actually having a PIT here.
> +         *
> +         * NB: Deliberately |, not ||, as we always want both reads.
> +         */
> +        val2 = inb(PIT_CH2);
> +        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
> +            return;
> +
> +        for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 )
> +        {
> +            if ( !(mask & offs) )
> +                continue;
> +            val2 = inb(PIT_CH2 + offs);
> +            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
> +                mask &= ~offs;
> +        }
> +    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
> +
> +    if ( mask )
> +    {
> +        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
> +        pit_alias_mask = mask;
> +    }
> +}
> +
>   /************************************************************
>    * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
>    */
> @@ -2414,6 +2480,8 @@ void __init early_time_init(void)
>       }
>   
>       preinit_pit();
> +    probe_pit_alias();
> +
>       tmp = init_platform_timer();
>       plt_tsc.frequency = tmp;
>   
> 
> 
>
Jan Beulich May 14, 2024, 7:43 a.m. UTC | #2
On 10.05.2024 19:40, Jason Andryuk wrote:
> On 2023-12-18 09:48, Jan Beulich wrote:
>> --- a/xen/arch/x86/time.c
>> +++ b/xen/arch/x86/time.c
>> @@ -425,6 +425,72 @@ static struct platform_timesource __init
>>       .resume = resume_pit,
>>   };
>>   
>> +unsigned int __initdata pit_alias_mask;
>> +
>> +static void __init probe_pit_alias(void)
>> +{
>> +    unsigned int mask = 0x1c;
>> +    uint8_t val = 0;
>> +
>> +    if ( !opt_probe_port_aliases )
>> +        return;
>> +
>> +    /*
>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
>> +     * count is loaded independent of counting being / becoming enabled.  Thus
>> +     * we have a 16-bit value fully under our control, to write and then check
>> +     * whether we can also read it back unaltered.
>> +     */
>> +
>> +    /* Turn off speaker output and disable channel 2 counting. */
>> +    outb(inb(0x61) & 0x0c, 0x61);
>> +
>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
> 
> Channel 2, Lobyte/Hibyte, 0b000 Mode 0, (Binary)
> 
> #define PIT_MODE_CH2 (2 << 6)
> #define PIT_MODE0_16BIT ((3 << 4) | (0 << 1))
> 
> outb(PIT_MODE_CH2 | PIT_MODE0_16BIT, PIT_MODE);

Hmm. I can certainly see the value of introducing such #define-s, but then
while doing so one ought to also adjust other code using constants as done
here (for consistency).

>> +
>> +    do {
>> +        uint8_t val2;
>> +        unsigned int offs;
>> +
>> +        outb(val, PIT_CH2);
>> +        outb(val ^ 0xff, PIT_CH2);
>> +
>> +        /* Wait for the Null Count bit to clear. */
>> +        do {
>> +            /* Latch status. */
>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
> 
> Read-back, Latch status,  read back timer channel 2

Was this meant as a request to extend the comment? If so, not quite,
as the line doesn't include any read-back. If not, I'm in trouble seeing
what you mean to tell me here (somewhat similar also for the first line
of your earlier comment still visible in context above).

>> +
>> +            /* Try to make sure we're actually having a PIT here. */
>> +            val2 = inb(PIT_CH2);
>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
> 
> if ( (val2 & PIT_RB_MASK) != PIT_MODE0_16BIT )
> 
> I think particularly a define for PIT_MODE0_16BIT would be helpful to 
> show what is expected to be the same.
> 
>> +                return;
>> +        } while ( val2 & (1 << 6) );
> 
> I think Roger might have mentioned on an earlier version - would it make 
> sense to have a counter to prevent looping forever?

Well, as before: The issue with bounding such loops is that the bound is
going to be entirely arbitrary (and hence easily too large / too small).

> Also, FYI, I tested the series.  My test machine didn't show any aliasing.

That likely was an AMD one then? It's only Intel chipsets I've seen aliasing
on so far, but there it's (almost) all of them (with newer data sheets even
stating that behavior). We could, beyond shim, make the option default in
patch 1 be "false" for systems with AMD CPUs (on the assumption that those
wouldn't have Intel chipsets).

Jan
Jason Andryuk May 14, 2024, 7:30 p.m. UTC | #3
On 2024-05-14 03:43, Jan Beulich wrote:
> On 10.05.2024 19:40, Jason Andryuk wrote:
>> On 2023-12-18 09:48, Jan Beulich wrote:
>>> --- a/xen/arch/x86/time.c
>>> +++ b/xen/arch/x86/time.c
>>> @@ -425,6 +425,72 @@ static struct platform_timesource __init
>>>        .resume = resume_pit,
>>>    };
>>>    
>>> +unsigned int __initdata pit_alias_mask;
>>> +
>>> +static void __init probe_pit_alias(void)
>>> +{
>>> +    unsigned int mask = 0x1c;
>>> +    uint8_t val = 0;
>>> +
>>> +    if ( !opt_probe_port_aliases )
>>> +        return;
>>> +
>>> +    /*
>>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
>>> +     * count is loaded independent of counting being / becoming enabled.  Thus
>>> +     * we have a 16-bit value fully under our control, to write and then check
>>> +     * whether we can also read it back unaltered.
>>> +     */
>>> +
>>> +    /* Turn off speaker output and disable channel 2 counting. */
>>> +    outb(inb(0x61) & 0x0c, 0x61);
>>> +
>>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
>>
>> Channel 2, Lobyte/Hibyte, 0b000 Mode 0, (Binary)
>>
>> #define PIT_MODE_CH2 (2 << 6)
>> #define PIT_MODE0_16BIT ((3 << 4) | (0 << 1))
>>
>> outb(PIT_MODE_CH2 | PIT_MODE0_16BIT, PIT_MODE);
> 
> Hmm. I can certainly see the value of introducing such #define-s, but then
> while doing so one ought to also adjust other code using constants as done
> here (for consistency).

I had to look up all these bit values, so I think it's nicer with 
#defines-s.  Particularly, using PIT_MODE0_16BIT for the programming and 
checking shows the relationship.  I wasn't looking to make more work for 
you.  This function is self-contained, so just using them here for the 
time being seems reasonable.

>>> +
>>> +    do {
>>> +        uint8_t val2;
>>> +        unsigned int offs;
>>> +
>>> +        outb(val, PIT_CH2);
>>> +        outb(val ^ 0xff, PIT_CH2);
>>> +
>>> +        /* Wait for the Null Count bit to clear. */
>>> +        do {
>>> +            /* Latch status. */
>>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
>>
>> Read-back, Latch status,  read back timer channel 2
> 
> Was this meant as a request to extend the comment? If so, not quite,
> as the line doesn't include any read-back. If not, I'm in trouble seeing
> what you mean to tell me here (somewhat similar also for the first line
> of your earlier comment still visible in context above).

Sorry, these were my notes as I was interpreting the bits.  I should 
have removed them from the email before sending as they aren't 
actionable comments.  Read back was in reference to writing (3 << 6) to 
the mode - not the action of read backing back the value.

>>> +
>>> +            /* Try to make sure we're actually having a PIT here. */
>>> +            val2 = inb(PIT_CH2);
>>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
>>
>> if ( (val2 & PIT_RB_MASK) != PIT_MODE0_16BIT )
>>
>> I think particularly a define for PIT_MODE0_16BIT would be helpful to
>> show what is expected to be the same.
>>
>>> +                return;
>>> +        } while ( val2 & (1 << 6) );
>>
>> I think Roger might have mentioned on an earlier version - would it make
>> sense to have a counter to prevent looping forever?
> 
> Well, as before: The issue with bounding such loops is that the bound is
> going to be entirely arbitrary (and hence easily too large / too small).

Ah, yes.  Your response had slipped my mind.

>> Also, FYI, I tested the series.  My test machine didn't show any aliasing.
> 
> That likely was an AMD one then? It's only Intel chipsets I've seen aliasing
> on so far, but there it's (almost) all of them (with newer data sheets even
> stating that behavior). We could, beyond shim, make the option default in
> patch 1 be "false" for systems with AMD CPUs (on the assumption that those
> wouldn't have Intel chipsets).

Indeed, it was an AMD system, but my sample size is 1.

I didn't realize this was motivated by aliasing being common on Intel 
chipsets.  I think that would be useful to include in the commit messages.

Thanks,
Jason
diff mbox series

Patch

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -495,7 +495,11 @@  int __init dom0_setup_permissions(struct
     rc |= ioports_deny_access(d, 0x4D0, 0x4D1);
 
     /* Interval Timer (PIT). */
-    rc |= ioports_deny_access(d, 0x40, 0x43);
+    for ( offs = 0, i = ISOLATE_LSB(pit_alias_mask) ?: 4;
+          offs <= pit_alias_mask; offs += i )
+        if ( !(offs & ~pit_alias_mask) )
+            rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs);
+
     /* PIT Channel 2 / PC Speaker Control. */
     rc |= ioports_deny_access(d, 0x61, 0x61);
 
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -47,6 +47,7 @@  extern unsigned long highmem_start;
 #endif
 
 extern unsigned int i8259A_alias_mask;
+extern unsigned int pit_alias_mask;
 
 extern int8_t opt_smt;
 extern int8_t opt_probe_port_aliases;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -425,6 +425,72 @@  static struct platform_timesource __init
     .resume = resume_pit,
 };
 
+unsigned int __initdata pit_alias_mask;
+
+static void __init probe_pit_alias(void)
+{
+    unsigned int mask = 0x1c;
+    uint8_t val = 0;
+
+    if ( !opt_probe_port_aliases )
+        return;
+
+    /*
+     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
+     * count is loaded independent of counting being / becoming enabled.  Thus
+     * we have a 16-bit value fully under our control, to write and then check
+     * whether we can also read it back unaltered.
+     */
+
+    /* Turn off speaker output and disable channel 2 counting. */
+    outb(inb(0x61) & 0x0c, 0x61);
+
+    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
+
+    do {
+        uint8_t val2;
+        unsigned int offs;
+
+        outb(val, PIT_CH2);
+        outb(val ^ 0xff, PIT_CH2);
+
+        /* Wait for the Null Count bit to clear. */
+        do {
+            /* Latch status. */
+            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
+
+            /* Try to make sure we're actually having a PIT here. */
+            val2 = inb(PIT_CH2);
+            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
+                return;
+        } while ( val2 & (1 << 6) );
+
+        /*
+         * Try to further make sure we're actually having a PIT here.
+         *
+         * NB: Deliberately |, not ||, as we always want both reads.
+         */
+        val2 = inb(PIT_CH2);
+        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
+            return;
+
+        for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 )
+        {
+            if ( !(mask & offs) )
+                continue;
+            val2 = inb(PIT_CH2 + offs);
+            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
+                mask &= ~offs;
+        }
+    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
+
+    if ( mask )
+    {
+        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
+        pit_alias_mask = mask;
+    }
+}
+
 /************************************************************
  * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
  */
@@ -2414,6 +2480,8 @@  void __init early_time_init(void)
     }
 
     preinit_pit();
+    probe_pit_alias();
+
     tmp = init_platform_timer();
     plt_tsc.frequency = tmp;