diff mbox series

[1/2] adjust special domain creation (and call it earlier on x86)

Message ID 5CF0F5460200007800233DA8@prv1-mh.provo.novell.com (mailing list archive)
State Superseded
Headers show
Series adjust special domain creation | expand

Commit Message

Jan Beulich May 31, 2019, 9:35 a.m. UTC
Split out this mostly arch-independent code into a common-code helper
function. (This does away with Arm's arch_init_memory() altogether.)

On x86 this needs to happen before acpi_boot_init(): Commit 9fa94e1058
("x86/ACPI: also parse AMD IOMMU tables early") only appeared to work
fine - it's really broken, and doesn't crash (on non-EFI AMD systems)
only because of there being a mapping of linear address 0 during early
boot. On EFI there is:

 Early fatal page fault at e008:ffff82d08024d58e (cr2=0000000000000220, ec=0000)
 ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
 CPU:    0
 RIP:    e008:[<ffff82d08024d58e>] pci.c#_pci_hide_device+0x17/0x3a
 RFLAGS: 0000000000010046   CONTEXT: hypervisor
 rax: 0000000000000000   rbx: 0000000000006000   rcx: 0000000000000000
 rdx: ffff83104f2ee9b0   rsi: ffff82e0209e5d48   rdi: ffff83104f2ee9a0
 rbp: ffff82d08081fce0   rsp: ffff82d08081fcb8   r8:  0000000000000000
 r9:  8000000000000000   r10: 0180000000000000   r11: 7fffffffffffffff
 r12: ffff83104f2ee9a0   r13: 0000000000000002   r14: ffff83104f2ee4b0
 r15: 0000000000000064   cr0: 0000000080050033   cr4: 00000000000000a0
 cr3: 000000009f614000   cr2: 0000000000000220
 fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
 ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
 Xen code around <ffff82d08024d58e> (pci.c#_pci_hide_device+0x17/0x3a):
  48 89 47 38 48 8d 57 10 <48> 8b 88 20 02 00 00 48 89 51 08 48 89 4f 10 48
 Xen stack trace from rsp=ffff82d08081fcb8:
[...]
 Xen call trace:
    [<ffff82d08024d58e>] pci.c#_pci_hide_device+0x17/0x3a
[   [<                >] pci_ro_device+...]
    [<ffff82d080617fe1>] amd_iommu_detect_one_acpi+0x161/0x249
    [<ffff82d0806186ac>] iommu_acpi.c#detect_iommu_acpi+0xb5/0xe7
    [<ffff82d08061cde0>] acpi_table_parse+0x61/0x90
    [<ffff82d080619e7d>] amd_iommu_detect_acpi+0x17/0x19
    [<ffff82d08061790b>] acpi_ivrs_init+0x20/0x5b
    [<ffff82d08062e838>] acpi_boot_init+0x301/0x30f
    [<ffff82d080628b10>] __start_xen+0x1daf/0x28a2
 
 Pagetable walk from 0000000000000220:
  L4[0x000] = 000000009f44f063 ffffffffffffffff
  L3[0x000] = 000000009f44b063 ffffffffffffffff
  L2[0x000] = 0000000000000000 ffffffffffffffff
 
 ****************************************
 Panic on CPU 0:
 FATAL TRAP: vector = 14 (page fault)
 [error_code=0000] , IN INTERRUPT CONTEXT
 ****************************************

Of course the bug would nevertheless have lead to post-boot crashes as
soon as the list would actually get traversed.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Julien Grall May 31, 2019, 10:03 a.m. UTC | #1
Hi Jan,

The Arm code looks good to me. I have few comments regarding the common 
changes.

On 5/31/19 10:35 AM, Jan Beulich wrote:
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -71,6 +71,11 @@ domid_t hardware_domid __read_mostly;
>   integer_param("hardware_dom", hardware_domid);
>   #endif
>   
> +/* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
> +struct domain *__read_mostly dom_xen;
> +struct domain *__read_mostly dom_io;
> +struct domain *__read_mostly dom_cow;

The __read_mostly makes sense here, however please mention it in the 
commit message.

[...]

> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -595,8 +595,6 @@ unsigned int domain_clamp_alloc_bitsize(
>   
>   unsigned long domain_get_maximum_gpfn(struct domain *d);
>   
> -extern struct domain *dom_xen, *dom_io, *dom_cow;	/* for vmcoreinfo */
> -
>   /* Definition of an mm lock: spinlock with extra fields for debugging */
>   typedef struct mm_lock {
>       spinlock_t         lock;
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -5,6 +5,7 @@
>   #include <xen/types.h>
>   
>   #include <public/xen.h>
> +

Without an explanation in the commit message, this looks like a spurious 
change.

>   #include <asm/domain.h>
>   #include <asm/numa.h>

Cheers,
Jan Beulich May 31, 2019, 10:09 a.m. UTC | #2
>>> On 31.05.19 at 12:03, <julien.grall@arm.com> wrote:
> On 5/31/19 10:35 AM, Jan Beulich wrote:
>> --- a/xen/include/xen/domain.h
>> +++ b/xen/include/xen/domain.h
>> @@ -5,6 +5,7 @@
>>   #include <xen/types.h>
>>   
>>   #include <public/xen.h>
>> +
> 
> Without an explanation in the commit message, this looks like a spurious 
> change.

Oh, it indeed is - it's a leftover from where I had first tried to place
the declarations.

Jan
Andrew Cooper May 31, 2019, 3:32 p.m. UTC | #3
On 31/05/2019 02:35, Jan Beulich wrote:
> @@ -516,6 +521,36 @@ struct domain *domain_create(domid_t dom
>      return ERR_PTR(err);
>  }
>  
> +void __init setup_special_domains(void)
> +{
> +    /*
> +     * Initialise our DOMID_XEN domain.
> +     * Any Xen-heap pages that we will allow to be mapped will have
> +     * their domain field set to dom_xen.
> +     * Hidden PCI devices will also be associated with this domain
> +     * (but be [partly] controlled by Dom0 nevertheless).
> +     */
> +    dom_xen = domain_create(DOMID_XEN, NULL, false);
> +    BUG_ON(IS_ERR(dom_xen));

I know this is copying code like-for-like, but this error handling is
terrible in practice.

Even just:

if ( IS_ERR(dom_xen) )
    panic("Failed to create dom_xen: %d\n", PTR_ERR(dom_xen));

would be an improvement.

> +#ifdef CONFIG_HAS_PCI
> +    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
> +#endif

The position of this identifies that we've got obviously got bugs
(perhaps latent) elsewhere, seeing as other special domains don't get
working constructs such as list_empty().

In the code which currently exists, I can't spot it ever being touched
for ARM, but it is constructed for all non-special x86 guests at the top
of arch_domain_create().

I think the best option, given the #ifdef here, is to reposition the
pdev fields into struct domain, rather than arch_domain, and have this
code fragment near the top of domain_create() where special domains will
all be covered.

~Andrew
Jan Beulich May 31, 2019, 3:59 p.m. UTC | #4
>>> On 31.05.19 at 17:32, <andrew.cooper3@citrix.com> wrote:
> On 31/05/2019 02:35, Jan Beulich wrote:
>> @@ -516,6 +521,36 @@ struct domain *domain_create(domid_t dom
>>      return ERR_PTR(err);
>>  }
>>  
>> +void __init setup_special_domains(void)
>> +{
>> +    /*
>> +     * Initialise our DOMID_XEN domain.
>> +     * Any Xen-heap pages that we will allow to be mapped will have
>> +     * their domain field set to dom_xen.
>> +     * Hidden PCI devices will also be associated with this domain
>> +     * (but be [partly] controlled by Dom0 nevertheless).
>> +     */
>> +    dom_xen = domain_create(DOMID_XEN, NULL, false);
>> +    BUG_ON(IS_ERR(dom_xen));
> 
> I know this is copying code like-for-like, but this error handling is
> terrible in practice.
> 
> Even just:
> 
> if ( IS_ERR(dom_xen) )
>     panic("Failed to create dom_xen: %d\n", PTR_ERR(dom_xen));
> 
> would be an improvement.

I'll be happy to do this; I didn't just because it doesn't really belong
here.

>> +#ifdef CONFIG_HAS_PCI
>> +    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
>> +#endif
> 
> The position of this identifies that we've got obviously got bugs
> (perhaps latent) elsewhere, seeing as other special domains don't get
> working constructs such as list_empty().
> 
> In the code which currently exists, I can't spot it ever being touched
> for ARM, but it is constructed for all non-special x86 guests at the top
> of arch_domain_create().
> 
> I think the best option, given the #ifdef here, is to reposition the
> pdev fields into struct domain, rather than arch_domain, and have this
> code fragment near the top of domain_create() where special domains will
> all be covered.

Except that if I do this, then not by special casing special domains.
"Normal" domains want this too - the initialization could then be
dropped (moved there) from x86-specific code. But this will want to
be a separate patch then.

Jan
Andrew Cooper May 31, 2019, 4:02 p.m. UTC | #5
On 31/05/2019 08:59, Jan Beulich wrote:
>
>>> +#ifdef CONFIG_HAS_PCI
>>> +    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
>>> +#endif
>> The position of this identifies that we've got obviously got bugs
>> (perhaps latent) elsewhere, seeing as other special domains don't get
>> working constructs such as list_empty().
>>
>> In the code which currently exists, I can't spot it ever being touched
>> for ARM, but it is constructed for all non-special x86 guests at the top
>> of arch_domain_create().
>>
>> I think the best option, given the #ifdef here, is to reposition the
>> pdev fields into struct domain, rather than arch_domain, and have this
>> code fragment near the top of domain_create() where special domains will
>> all be covered.
> Except that if I do this, then not by special casing special domains.

What special casing?  There is a block of code near the start of
domain_create() which is run for all domain, including special ones.

~Andrew
Jan Beulich June 3, 2019, 8:29 a.m. UTC | #6
>>> On 31.05.19 at 18:02, <andrew.cooper3@citrix.com> wrote:
> On 31/05/2019 08:59, Jan Beulich wrote:
>>
>>>> +#ifdef CONFIG_HAS_PCI
>>>> +    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
>>>> +#endif
>>> The position of this identifies that we've got obviously got bugs
>>> (perhaps latent) elsewhere, seeing as other special domains don't get
>>> working constructs such as list_empty().
>>>
>>> In the code which currently exists, I can't spot it ever being touched
>>> for ARM, but it is constructed for all non-special x86 guests at the top
>>> of arch_domain_create().
>>>
>>> I think the best option, given the #ifdef here, is to reposition the
>>> pdev fields into struct domain, rather than arch_domain, and have this
>>> code fragment near the top of domain_create() where special domains will
>>> all be covered.
>> Except that if I do this, then not by special casing special domains.
> 
> What special casing?  There is a block of code near the start of
> domain_create() which is run for all domain, including special ones.

Oh, perhaps a misunderstanding on my part then, as you were
saying "where special domains will all be covered", without also
mentioning non-special ones.

Jan
diff mbox series

Patch

--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -42,8 +42,6 @@ 
 #include <xen/libfdt/libfdt.h>
 #include <asm/setup.h>
 
-struct domain *dom_xen, *dom_io, *dom_cow;
-
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef virt_to_mfn
 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
@@ -513,32 +511,6 @@  void flush_page_to_ram(unsigned long mfn
         invalidate_icache();
 }
 
-void __init arch_init_memory(void)
-{
-    /*
-     * Initialise our DOMID_XEN domain.
-     * Any Xen-heap pages that we will allow to be mapped will have
-     * their domain field set to dom_xen.
-     */
-    dom_xen = domain_create(DOMID_XEN, NULL, false);
-    BUG_ON(IS_ERR(dom_xen));
-
-    /*
-     * Initialise our DOMID_IO domain.
-     * This domain owns I/O pages that are within the range of the page_info
-     * array. Mappings occur at the priv of the caller.
-     */
-    dom_io = domain_create(DOMID_IO, NULL, false);
-    BUG_ON(IS_ERR(dom_io));
-
-    /*
-     * Initialise our COW domain.
-     * This domain owns sharable pages.
-     */
-    dom_cow = domain_create(DOMID_COW, NULL, false);
-    BUG_ON(IS_ERR(dom_cow));
-}
-
 static inline lpae_t pte_of_xenaddr(vaddr_t va)
 {
     paddr_t ma = va + phys_offset;
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -846,7 +846,7 @@  void __init start_xen(unsigned long boot
 
     rcu_init();
 
-    arch_init_memory();
+    setup_special_domains();
 
     local_irq_enable();
     local_abort_enable();
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -160,9 +160,6 @@  l1_pgentry_t __section(".bss.page_aligne
 
 paddr_t __read_mostly mem_hotplug;
 
-/* Private domain structs for DOMID_XEN and DOMID_IO. */
-struct domain *dom_xen, *dom_io, *dom_cow;
-
 /* Frame table size in pages. */
 unsigned long max_page;
 unsigned long total_pages;
@@ -283,32 +280,6 @@  void __init arch_init_memory(void)
           _PAGE_DIRTY | _PAGE_AVAIL | _PAGE_AVAIL_HIGH | _PAGE_NX);
 
     /*
-     * Initialise our DOMID_XEN domain.
-     * Any Xen-heap pages that we will allow to be mapped will have
-     * their domain field set to dom_xen.
-     * Hidden PCI devices will also be associated with this domain
-     * (but be [partly] controlled by Dom0 nevertheless).
-     */
-    dom_xen = domain_create(DOMID_XEN, NULL, false);
-    BUG_ON(IS_ERR(dom_xen));
-    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
-
-    /*
-     * Initialise our DOMID_IO domain.
-     * This domain owns I/O pages that are within the range of the page_info
-     * array. Mappings occur at the priv of the caller.
-     */
-    dom_io = domain_create(DOMID_IO, NULL, false);
-    BUG_ON(IS_ERR(dom_io));
-
-    /*
-     * Initialise our COW domain.
-     * This domain owns sharable pages.
-     */
-    dom_cow = domain_create(DOMID_COW, NULL, false);
-    BUG_ON(IS_ERR(dom_cow));
-
-    /*
      * First 1MB of RAM is historically marked as I/O.
      * Note that apart from IO Xen also uses the low 1MB to store the AP boot
      * trampoline and boot information metadata. Due to this always special
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1533,6 +1533,8 @@  void __init noreturn __start_xen(unsigne
     mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
                                   RANGESETF_prettyprint_hex);
 
+    setup_special_domains();
+
     acpi_boot_init();
 
     if ( smp_found_config )
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -71,6 +71,11 @@  domid_t hardware_domid __read_mostly;
 integer_param("hardware_dom", hardware_domid);
 #endif
 
+/* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
+struct domain *__read_mostly dom_xen;
+struct domain *__read_mostly dom_io;
+struct domain *__read_mostly dom_cow;
+
 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
 
 vcpu_info_t dummy_vcpu_info;
@@ -516,6 +521,36 @@  struct domain *domain_create(domid_t dom
     return ERR_PTR(err);
 }
 
+void __init setup_special_domains(void)
+{
+    /*
+     * Initialise our DOMID_XEN domain.
+     * Any Xen-heap pages that we will allow to be mapped will have
+     * their domain field set to dom_xen.
+     * Hidden PCI devices will also be associated with this domain
+     * (but be [partly] controlled by Dom0 nevertheless).
+     */
+    dom_xen = domain_create(DOMID_XEN, NULL, false);
+    BUG_ON(IS_ERR(dom_xen));
+#ifdef CONFIG_HAS_PCI
+    INIT_LIST_HEAD(&dom_xen->arch.pdev_list);
+#endif
+
+    /*
+     * Initialise our DOMID_IO domain.
+     * This domain owns I/O pages that are within the range of the page_info
+     * array. Mappings occur at the priv of the caller.
+     */
+    dom_io = domain_create(DOMID_IO, NULL, false);
+    BUG_ON(IS_ERR(dom_io));
+
+    /*
+     * Initialise our COW domain.
+     * This domain owns sharable pages.
+     */
+    dom_cow = domain_create(DOMID_COW, NULL, false);
+    BUG_ON(IS_ERR(dom_cow));
+}
 
 void domain_update_node_affinity(struct domain *d)
 {
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -334,8 +334,6 @@  long arch_memory_op(int op, XEN_GUEST_HA
 
 unsigned long domain_get_maximum_gpfn(struct domain *d);
 
-extern struct domain *dom_xen, *dom_io, *dom_cow;
-
 #define memguard_guard_stack(_p)       ((void)0)
 #define memguard_guard_range(_p,_l)    ((void)0)
 #define memguard_unguard_range(_p,_l)  ((void)0)
--- a/xen/include/asm-arm/setup.h
+++ b/xen/include/asm-arm/setup.h
@@ -77,8 +77,6 @@  extern struct bootinfo bootinfo;
 
 extern domid_t max_init_domid;
 
-void arch_init_memory(void);
-
 void copy_from_paddr(void *dst, paddr_t paddr, unsigned long len);
 
 size_t estimate_efi_size(int mem_nr_banks);
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -595,8 +595,6 @@  unsigned int domain_clamp_alloc_bitsize(
 
 unsigned long domain_get_maximum_gpfn(struct domain *d);
 
-extern struct domain *dom_xen, *dom_io, *dom_cow;	/* for vmcoreinfo */
-
 /* Definition of an mm lock: spinlock with extra fields for debugging */
 typedef struct mm_lock {
     spinlock_t         lock;
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -5,6 +5,7 @@ 
 #include <xen/types.h>
 
 #include <public/xen.h>
+
 #include <asm/domain.h>
 #include <asm/numa.h>
 
@@ -22,6 +23,8 @@  struct vcpu *alloc_dom0_vcpu0(struct dom
 int vcpu_reset(struct vcpu *);
 int vcpu_up(struct vcpu *v);
 
+void setup_special_domains(void);
+
 struct xen_domctl_getdomaininfo;
 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
 void arch_get_domain_info(const struct domain *d,
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -642,6 +642,9 @@  static inline void filtered_flush_tlb_ma
     }
 }
 
+/* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
+extern struct domain *dom_xen, *dom_io, *dom_cow;
+
 enum XENSHARE_flags {
     SHARE_rw,
     SHARE_ro,