diff mbox

[v2,for-4.9] x86/mm: Fix incorrect unmapping of 2MB and 1GB pages

Message ID 1494414801-28953-1-git-send-email-igor.druzhinin@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Igor Druzhinin May 10, 2017, 11:13 a.m. UTC
The same set of functions is used to set as well as to clean
P2M entries, except that for clean operations INVALID_MFN (~0UL)
is passed as a parameter. Unfortunately, when calculating an
appropriate target order for a particular mapping INVALID_MFN
is not taken into account which leads to 4K page target order
being set each time even for 2MB and 1GB mappings. This eventually
breaks down an EPT structure irreversibly into 4K mappings which
prevents consecutive high order mappings to this area.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
---
Changes in v2:
* changed mistakenly used mfn_valid() to mfn_eq()
* aggregated gfn-mfn mask into one

CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: George Dunlap <george.dunlap@eu.citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>

Bugfix intended for 4.9 release.
---
 xen/arch/x86/mm/p2m-ept.c |  3 ++-
 xen/arch/x86/mm/p2m.c     | 11 +++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

Comments

Jan Beulich May 10, 2017, 12:08 p.m. UTC | #1
>>> On 10.05.17 at 13:13, <igor.druzhinin@citrix.com> wrote:
> The same set of functions is used to set as well as to clean
> P2M entries, except that for clean operations INVALID_MFN (~0UL)
> is passed as a parameter. Unfortunately, when calculating an
> appropriate target order for a particular mapping INVALID_MFN
> is not taken into account which leads to 4K page target order
> being set each time even for 2MB and 1GB mappings. This eventually
> breaks down an EPT structure irreversibly into 4K mappings which
> prevents consecutive high order mappings to this area.
> 
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
Tian, Kevin May 15, 2017, 1:25 a.m. UTC | #2
> From: Igor Druzhinin [mailto:igor.druzhinin@citrix.com]
> Sent: Wednesday, May 10, 2017 7:13 PM
> 
> The same set of functions is used to set as well as to clean
> P2M entries, except that for clean operations INVALID_MFN (~0UL)
> is passed as a parameter. Unfortunately, when calculating an
> appropriate target order for a particular mapping INVALID_MFN
> is not taken into account which leads to 4K page target order
> being set each time even for 2MB and 1GB mappings. This eventually
> breaks down an EPT structure irreversibly into 4K mappings which
> prevents consecutive high order mappings to this area.
> 
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
George Dunlap May 15, 2017, 1:38 p.m. UTC | #3
On Wed, May 10, 2017 at 12:13 PM, Igor Druzhinin
<igor.druzhinin@citrix.com> wrote:
> The same set of functions is used to set as well as to clean
> P2M entries, except that for clean operations INVALID_MFN (~0UL)
> is passed as a parameter. Unfortunately, when calculating an
> appropriate target order for a particular mapping INVALID_MFN
> is not taken into account which leads to 4K page target order
> being set each time even for 2MB and 1GB mappings. This eventually
> breaks down an EPT structure irreversibly into 4K mappings which
> prevents consecutive high order mappings to this area.
>
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> ---
> Changes in v2:
> * changed mistakenly used mfn_valid() to mfn_eq()
> * aggregated gfn-mfn mask into one

Acked-by: George Dunlap <george.dunlap@citrix.com>

This will need a release-ack from Julien as well.

 -George


>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> CC: George Dunlap <george.dunlap@eu.citrix.com>
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
>
> Bugfix intended for 4.9 release.
> ---
>  xen/arch/x86/mm/p2m-ept.c |  3 ++-
>  xen/arch/x86/mm/p2m.c     | 11 +++++++----
>  2 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index f37a1f2..f98121d 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -681,6 +681,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>      ept_entry_t *table, *ept_entry = NULL;
>      unsigned long gfn_remainder = gfn;
>      unsigned int i, target = order / EPT_TABLE_ORDER;
> +    unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn;
>      int ret, rc = 0;
>      bool_t entry_written = 0;
>      bool_t direct_mmio = (p2mt == p2m_mmio_direct);
> @@ -701,7 +702,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>       * 2. gfn not exceeding guest physical address width.
>       * 3. passing a valid order.
>       */
> -    if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
> +    if ( (fn_mask & ((1UL << order) - 1)) ||
>           ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) ||
>           (order % EPT_TABLE_ORDER) )
>          return -EINVAL;
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index ae70a92..e902f1a 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -543,12 +543,15 @@ int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>      while ( todo )
>      {
>          if ( hap_enabled(d) )
> -            order = (!((gfn | mfn_x(mfn) | todo) &
> -                       ((1ul << PAGE_ORDER_1G) - 1)) &&
> +        {
> +            unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ?
> +                                     (gfn | mfn_x(mfn) | todo) : (gfn | todo);
> +
> +            order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
>                       hap_has_1gb) ? PAGE_ORDER_1G :
> -                    (!((gfn | mfn_x(mfn) | todo) &
> -                       ((1ul << PAGE_ORDER_2M) - 1)) &&
> +                    (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
>                       hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
> +        }
>          else
>              order = 0;
>
> --
> 2.7.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
Julien Grall May 17, 2017, 2:26 p.m. UTC | #4
Hello,

Sorry I missed that patch. In the future, please CC me if you want a 
patch to go in Xen 4.9.

On 10/05/17 12:13, Igor Druzhinin wrote:
> The same set of functions is used to set as well as to clean
> P2M entries, except that for clean operations INVALID_MFN (~0UL)
> is passed as a parameter. Unfortunately, when calculating an
> appropriate target order for a particular mapping INVALID_MFN
> is not taken into account which leads to 4K page target order
> being set each time even for 2MB and 1GB mappings. This eventually
> breaks down an EPT structure irreversibly into 4K mappings which
> prevents consecutive high order mappings to this area.

It sounds like we have a similar issue on ARM as I wrote the 
implementation based on the x86 code. I will send a patch.

>
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>

Release-acked-by: Julien Grall <julien.grall@arm.com>

Cheers,
Boris Ostrovsky May 22, 2017, 6:01 p.m. UTC | #5
On 05/10/2017 07:13 AM, Igor Druzhinin wrote:
> The same set of functions is used to set as well as to clean
> P2M entries, except that for clean operations INVALID_MFN (~0UL)
> is passed as a parameter. Unfortunately, when calculating an
> appropriate target order for a particular mapping INVALID_MFN
> is not taken into account which leads to 4K page target order
> being set each time even for 2MB and 1GB mappings. This eventually
> breaks down an EPT structure irreversibly into 4K mappings which
> prevents consecutive high order mappings to this area.
>
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>

This patch breaks HVM/PVH on AMD when maxmem > memory.

-boris
Jan Beulich May 23, 2017, 8:07 a.m. UTC | #6
>>> On 22.05.17 at 20:01, <boris.ostrovsky@oracle.com> wrote:
> On 05/10/2017 07:13 AM, Igor Druzhinin wrote:
>> The same set of functions is used to set as well as to clean
>> P2M entries, except that for clean operations INVALID_MFN (~0UL)
>> is passed as a parameter. Unfortunately, when calculating an
>> appropriate target order for a particular mapping INVALID_MFN
>> is not taken into account which leads to 4K page target order
>> being set each time even for 2MB and 1GB mappings. This eventually
>> breaks down an EPT structure irreversibly into 4K mappings which
>> prevents consecutive high order mappings to this area.
>>
>> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> 
> This patch breaks HVM/PVH on AMD when maxmem > memory.

To be honest, a little more detail might help addressing the issue
in a timely manner. I'd suppose you know more than just "breaks"
...

Jan
Boris Ostrovsky May 23, 2017, 1 p.m. UTC | #7
On 05/23/2017 04:07 AM, Jan Beulich wrote:
>>>> On 22.05.17 at 20:01, <boris.ostrovsky@oracle.com> wrote:
>> On 05/10/2017 07:13 AM, Igor Druzhinin wrote:
>>> The same set of functions is used to set as well as to clean
>>> P2M entries, except that for clean operations INVALID_MFN (~0UL)
>>> is passed as a parameter. Unfortunately, when calculating an
>>> appropriate target order for a particular mapping INVALID_MFN
>>> is not taken into account which leads to 4K page target order
>>> being set each time even for 2MB and 1GB mappings. This eventually
>>> breaks down an EPT structure irreversibly into 4K mappings which
>>> prevents consecutive high order mappings to this area.
>>>
>>> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
>> This patch breaks HVM/PVH on AMD when maxmem > memory.
> To be honest, a little more detail might help addressing the issue
> in a timely manner. I'd suppose you know more than just "breaks"
> ...

Uhm.. yes, this was somewhat lacking in details, sorry. I poked a bit at
this yesterday but haven't found anything obvious yet.

So with HVM:

builder = "hvm"
name = "fedora"
memory=1024
maxmem=2048
vcpus = 2
vif=['mac=00:21:F6:00:01:DC,bridge=xenbr0']
disk = [ '/root/virt/fedora2.img,raw,hda,rw' ]
vnc = 1
boot="dc"
vncdisplay=1
serial='pty'

(d1) HVM Loader
(d1) Detected Xen v4.9-rc
(d1) Xenbus rings @0xfeffc000, event channel 1
(d1) System requested SeaBIOS
(d1) CPU speed is 2494 MHz
(d1) Relocating guest memory for lowmem MMIO space disabled
(XEN) irq.c:285: Dom1 PCI link 0 changed 0 -> 5
(d1) PCI-ISA link 0 routed to IRQ5
(XEN) irq.c:285: Dom1 PCI link 1 changed 0 -> 10
(d1) PCI-ISA link 1 routed to IRQ10
(XEN) irq.c:285: Dom1 PCI link 2 changed 0 -> 11
(d1) PCI-ISA link 2 routed to IRQ11
(XEN) irq.c:285: Dom1 PCI link 3 changed 0 -> 5
(d1) PCI-ISA link 3 routed to IRQ5
(d1) pci dev 01:3 INTA->IRQ10
(d1) pci dev 02:0 INTA->IRQ11
(d1) pci dev 04:0 INTA->IRQ5
(d1) No RAM in high memory; setting high_mem resource base to 100000000
(d1) pci dev 03:0 bar 10 size 002000000: 0f0000008
(d1) pci dev 02:0 bar 14 size 001000000: 0f2000008
(d1) pci dev 04:0 bar 30 size 000040000: 0f3000000
(d1) pci dev 03:0 bar 30 size 000010000: 0f3040000
(d1) pci dev 03:0 bar 14 size 000001000: 0f3050000
(d1) pci dev 02:0 bar 10 size 000000100: 00000c001
(d1) pci dev 04:0 bar 10 size 000000100: 00000c101
(d1) pci dev 04:0 bar 14 size 000000100: 0f3051000
(d1) pci dev 01:1 bar 20 size 000000010: 00000c201
(d1) Multiprocessor initialisation:
(d1)  - CPU0 ... 48-bit phys ... fixed MTRRs ... var MTRRs [1/8] ... done.
(d1)  - CPU1 ... 48-bit phys ... fixed MTRRs ... var MTRRs [1/8] ... done.
(d1) Testing HVM environment:
(XEN) d1v0 Triple fault - invoking HVM shutdown action 1
(XEN) *** Dumping Dom1 vcpu#0 state: ***
(XEN) ----[ Xen-4.9-rc  x86_64  debug=y   Tainted:  C   ]----
(XEN) CPU:    11
(XEN) RIP:    0018:[<000000000010815c>]
(XEN) RFLAGS: 0000000000000086   CONTEXT: hvm guest (d1v0)
(XEN) rax: 0000000080000011   rbx: 000000000017c000   rcx: 0000000000003000
(XEN) rdx: 00000000ffefffff   rsi: 0000000000000000   rdi: 0000000000000000
(XEN) rbp: 0000000000136478   rsp: 0000000000136478   r8:  0000000000000000
(XEN) r9:  0000000000000000   r10: 0000000000000000   r11: 0000000000000000
(XEN) r12: 0000000000000000   r13: 0000000000000000   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080000011   cr4: 0000000000000000
(XEN) cr3: 0000000000800000   cr2: 000000000010815c
(XEN) ds: 0020   es: 0020   fs: 0020   gs: 0020   ss: 0020   cs: 0018


0x10815c is tools/firmware/hvmloader/tests.c:start_paging(), the 'jmp'
after we enable paging (load cr0 with bit 31 set).


With PVH:

kernel="/root/linux/vmlinux"
extra="root=/dev/xvda2 debug earlyprintk=xen console=hvc0"
memory=1024
maxmem=12000
vcpus=2
maxvcpus=3
builder="hvm"
device_model_version="none"
name = "pvh"
on_crash="preserve"
disk=['/root/virt/fedora.img,raw,hda,rw']

root@ovs101> xl create -c ~/virt/pvh.conf
Parsing config from /root/virt/pvh.conf
libxl: notice: libxl_numa.c:518:libxl__get_numa_candidate: NUMA
placement failed, performance might be affected
S3 disabled
S4 disabled
CONV disabled
xc: error: panic: xc_dom_boot.c:178: xc_dom_boot_domU_map: failed to
mmap domU pages 0x1000+0x1062 [mmap, errno=22 (Invalid argument)]:
Internal error
libxl: error: libxl_dom.c:679:libxl__build_dom: xc_dom_build_image
failed: Invalid argument
libxl: error: libxl_create.c:1217:domcreate_rebuild_done: Domain
4:cannot (re-)build domain: -3
libxl: error: libxl_domain.c:1003:libxl__destroy_domid: Domain
4:Non-existant domain
libxl: error: libxl_domain.c:962:domain_destroy_callback: Domain
4:Unable to destroy guest
libxl: error: libxl_domain.c:889:domain_destroy_cb: Domain 4:Destruction
of domain failed
root@ovs101>
Jan Beulich May 23, 2017, 1:17 p.m. UTC | #8
>>> On 23.05.17 at 15:00, <boris.ostrovsky@oracle.com> wrote:
> (d1) Testing HVM environment:
> (XEN) d1v0 Triple fault - invoking HVM shutdown action 1
> (XEN) *** Dumping Dom1 vcpu#0 state: ***
> (XEN) ----[ Xen-4.9-rc  x86_64  debug=y   Tainted:  C   ]----
> (XEN) CPU:    11
> (XEN) RIP:    0018:[<000000000010815c>]
> (XEN) RFLAGS: 0000000000000086   CONTEXT: hvm guest (d1v0)
> (XEN) rax: 0000000080000011   rbx: 000000000017c000   rcx: 0000000000003000
> (XEN) rdx: 00000000ffefffff   rsi: 0000000000000000   rdi: 0000000000000000
> (XEN) rbp: 0000000000136478   rsp: 0000000000136478   r8:  0000000000000000
> (XEN) r9:  0000000000000000   r10: 0000000000000000   r11: 0000000000000000
> (XEN) r12: 0000000000000000   r13: 0000000000000000   r14: 0000000000000000
> (XEN) r15: 0000000000000000   cr0: 0000000080000011   cr4: 0000000000000000
> (XEN) cr3: 0000000000800000   cr2: 000000000010815c
> (XEN) ds: 0020   es: 0020   fs: 0020   gs: 0020   ss: 0020   cs: 0018
> 
> 
> 0x10815c is tools/firmware/hvmloader/tests.c:start_paging(), the 'jmp'
> after we enable paging (load cr0 with bit 31 set).

Odd. Suggests page tables are completely screwed.

> root@ovs101> xl create -c ~/virt/pvh.conf
> Parsing config from /root/virt/pvh.conf
> libxl: notice: libxl_numa.c:518:libxl__get_numa_candidate: NUMA
> placement failed, performance might be affected
> S3 disabled
> S4 disabled
> CONV disabled
> xc: error: panic: xc_dom_boot.c:178: xc_dom_boot_domU_map: failed to
> mmap domU pages 0x1000+0x1062 [mmap, errno=22 (Invalid argument)]:
> Internal error

This is even more strange. I can't seem to make a connection to
the changes in said commit at all. And I did go through p2m-pt.c's
relevant code another time this morning, without spotting any
possible oversight by Igor. IOW I'm now really curious what it is
that I'm not seeing (and that's apparently NPT-specific).

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index f37a1f2..f98121d 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -681,6 +681,7 @@  ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     ept_entry_t *table, *ept_entry = NULL;
     unsigned long gfn_remainder = gfn;
     unsigned int i, target = order / EPT_TABLE_ORDER;
+    unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn;
     int ret, rc = 0;
     bool_t entry_written = 0;
     bool_t direct_mmio = (p2mt == p2m_mmio_direct);
@@ -701,7 +702,7 @@  ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
      * 2. gfn not exceeding guest physical address width.
      * 3. passing a valid order.
      */
-    if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
+    if ( (fn_mask & ((1UL << order) - 1)) ||
          ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) ||
          (order % EPT_TABLE_ORDER) )
         return -EINVAL;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index ae70a92..e902f1a 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -543,12 +543,15 @@  int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     while ( todo )
     {
         if ( hap_enabled(d) )
-            order = (!((gfn | mfn_x(mfn) | todo) &
-                       ((1ul << PAGE_ORDER_1G) - 1)) &&
+        {
+            unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ?
+                                     (gfn | mfn_x(mfn) | todo) : (gfn | todo);
+
+            order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
                      hap_has_1gb) ? PAGE_ORDER_1G :
-                    (!((gfn | mfn_x(mfn) | todo) &
-                       ((1ul << PAGE_ORDER_2M) - 1)) &&
+                    (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
                      hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
+        }
         else
             order = 0;