diff mbox series

[v2,3/5] arm/dom0less: put dom0less feature code in a separate module

Message ID 20230927140133.631192-4-luca.fancellu@arm.com (mailing list archive)
State Superseded
Headers show
Series Fine granular configuration | expand

Commit Message

Luca Fancellu Sept. 27, 2023, 2:01 p.m. UTC
Currently the dom0less feature code is mostly inside domain_build.c
and setup.c, it is a feature that may not be useful to everyone so
put the code in a different compilation module in order to make it
easier to disable the feature in the future.

Move gic_interrupt_t in domain_build.h to use it with the function
declaration, move its comment above the declaration.

The following functions are now visible externally from domain_build
because they are used also from the dom0less-build module:
 - get_allocation_size
 - set_interrupt
 - domain_fdt_begin_node
 - make_memory_node
 - make_resv_memory_node
 - make_hypervisor_node
 - make_psci_node
 - make_cpus_node
 - make_timer_node
 - handle_device_interrupts
 - construct_domain
 - process_shm

The functions allocate_static_memory and assign_static_memory_11
are now externally visible, so put their declarations into
domain_build.h and move the #else and stub definition in the header
as well.

Move is_dom0less_mode from setup.c to dom0less-build.c and make it
externally visible.

Where spotted, fix code style issues.

No functional change is intended.

Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
---
 xen/arch/arm/Makefile                     |    1 +
 xen/arch/arm/dom0less-build.c             | 1087 ++++++++++++++++++
 xen/arch/arm/domain_build.c               | 1276 ++-------------------
 xen/arch/arm/include/asm/dom0less-build.h |   25 +
 xen/arch/arm/include/asm/domain_build.h   |   58 +
 xen/arch/arm/include/asm/setup.h          |    1 -
 xen/arch/arm/setup.c                      |   33 +-
 7 files changed, 1288 insertions(+), 1193 deletions(-)
 create mode 100644 xen/arch/arm/dom0less-build.c
 create mode 100644 xen/arch/arm/include/asm/dom0less-build.h

Comments

Stefano Stabellini Sept. 29, 2023, 12:33 a.m. UTC | #1
On Wed, 27 Sep 2023, Luca Fancellu wrote:
> Currently the dom0less feature code is mostly inside domain_build.c
> and setup.c, it is a feature that may not be useful to everyone so
> put the code in a different compilation module in order to make it
> easier to disable the feature in the future.
> 
> Move gic_interrupt_t in domain_build.h to use it with the function
> declaration, move its comment above the declaration.
> 
> The following functions are now visible externally from domain_build
> because they are used also from the dom0less-build module:
>  - get_allocation_size
>  - set_interrupt
>  - domain_fdt_begin_node
>  - make_memory_node
>  - make_resv_memory_node
>  - make_hypervisor_node
>  - make_psci_node
>  - make_cpus_node
>  - make_timer_node
>  - handle_device_interrupts
>  - construct_domain
>  - process_shm
> 
> The functions allocate_static_memory and assign_static_memory_11
> are now externally visible, so put their declarations into
> domain_build.h and move the #else and stub definition in the header
> as well.
> 
> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
> externally visible.
> 
> Where spotted, fix code style issues.
> 
> No functional change is intended.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>

This is great! A couple of questions.

Why was allocate_static_memory not moved to dom0less-build.c ?

Would it make sense to also move construct_dom0 to dom0less-build.c
given the similarities with construct_domU? I am not sure about this.



> ---
>  xen/arch/arm/Makefile                     |    1 +
>  xen/arch/arm/dom0less-build.c             | 1087 ++++++++++++++++++
>  xen/arch/arm/domain_build.c               | 1276 ++-------------------
>  xen/arch/arm/include/asm/dom0less-build.h |   25 +
>  xen/arch/arm/include/asm/domain_build.h   |   58 +
>  xen/arch/arm/include/asm/setup.h          |    1 -
>  xen/arch/arm/setup.c                      |   33 +-
>  7 files changed, 1288 insertions(+), 1193 deletions(-)
>  create mode 100644 xen/arch/arm/dom0less-build.c
>  create mode 100644 xen/arch/arm/include/asm/dom0less-build.h
> 
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index 81c31c36fc3d..70dd7201ef30 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -15,6 +15,7 @@ obj-y += cpufeature.o
>  obj-y += decode.o
>  obj-y += device.o
>  obj-$(CONFIG_IOREQ_SERVER) += dm.o
> +obj-y += dom0less-build.init.o
>  obj-y += domain.o
>  obj-y += domain_build.init.o
>  obj-$(CONFIG_ARCH_MAP_DOMAIN_PAGE) += domain_page.o
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> new file mode 100644
> index 000000000000..dc9c90cf00a7
> --- /dev/null
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -0,0 +1,1087 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * xen/arch/arm/dom0less-build.c
> + *
> + * Code related to the dom0less functionality
> + *
> + * Copyright (C) 2023 Arm Ltd.
> + */
> +
> +#include <xen/device_tree.h>
> +#include <xen/err.h>
> +#include <xen/event.h>
> +#include <xen/grant_table.h>
> +#include <xen/iocap.h>
> +#include <xen/libfdt/libfdt.h>
> +#include <xen/sched.h>
> +#include <xen/serial.h>
> +#include <xen/sizes.h>
> +#include <xen/vmap.h>
> +
> +#include <asm/arm64/sve.h>
> +#include <asm/dom0less-build.h>
> +#include <asm/domain_build.h>
> +
> +bool __init is_dom0less_mode(void)
> +{
> +    struct bootmodules *mods = &bootinfo.modules;
> +    struct bootmodule *mod;
> +    unsigned int i;
> +    bool dom0found = false;
> +    bool domUfound = false;
> +
> +    /* Look into the bootmodules */
> +    for ( i = 0 ; i < mods->nr_mods ; i++ )
> +    {
> +        mod = &mods->module[i];
> +        /* Find if dom0 and domU kernels are present */
> +        if ( mod->kind == BOOTMOD_KERNEL )
> +        {
> +            if ( mod->domU == false )
> +            {
> +                dom0found = true;
> +                break;
> +            }
> +            else
> +                domUfound = true;
> +        }
> +    }
> +
> +    /*
> +     * If there is no dom0 kernel but at least one domU, then we are in
> +     * dom0less mode
> +     */
> +    return ( !dom0found && domUfound );
> +}
> +
> +static bool __init allocate_bank_memory(struct domain *d,
> +                                        struct kernel_info *kinfo,
> +                                        gfn_t sgfn,
> +                                        paddr_t tot_size)
> +{
> +    int res;
> +    struct page_info *pg;
> +    struct membank *bank;
> +    unsigned int max_order = ~0;
> +
> +    /*
> +     * allocate_bank_memory can be called with a tot_size of zero for
> +     * the second memory bank. It is not an error and we can safely
> +     * avoid creating a zero-size memory bank.
> +     */
> +    if ( tot_size == 0 )
> +        return true;
> +
> +    bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
> +    bank->start = gfn_to_gaddr(sgfn);
> +    bank->size = tot_size;
> +
> +    while ( tot_size > 0 )
> +    {
> +        unsigned int order = get_allocation_size(tot_size);
> +
> +        order = min(max_order, order);
> +
> +        pg = alloc_domheap_pages(d, order, 0);
> +        if ( !pg )
> +        {
> +            /*
> +             * If we can't allocate one page, then it is unlikely to
> +             * succeed in the next iteration. So bail out.
> +             */
> +            if ( !order )
> +                return false;
> +
> +            /*
> +             * If we can't allocate memory with order, then it is
> +             * unlikely to succeed in the next iteration.
> +             * Record the order - 1 to avoid re-trying.
> +             */
> +            max_order = order - 1;
> +            continue;
> +        }
> +
> +        res = guest_physmap_add_page(d, sgfn, page_to_mfn(pg), order);
> +        if ( res )
> +        {
> +            dprintk(XENLOG_ERR, "Failed map pages to DOMU: %d", res);
> +            return false;
> +        }
> +
> +        sgfn = gfn_add(sgfn, 1UL << order);
> +        tot_size -= (1ULL << (PAGE_SHIFT + order));
> +    }
> +
> +    kinfo->mem.nr_banks++;
> +    kinfo->unassigned_mem -= bank->size;
> +
> +    return true;
> +}
> +
> +static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
> +{
> +    unsigned int i;
> +    paddr_t bank_size;
> +
> +    printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
> +           /* Don't want format this as PRIpaddr (16 digit hex) */
> +           (unsigned long)(kinfo->unassigned_mem >> 20), d);
> +
> +    kinfo->mem.nr_banks = 0;
> +    bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem);
> +    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM0_BASE),
> +                               bank_size) )
> +        goto fail;
> +
> +    bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem);
> +    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM1_BASE),
> +                               bank_size) )
> +        goto fail;
> +
> +    if ( kinfo->unassigned_mem )
> +        goto fail;
> +
> +    for( i = 0; i < kinfo->mem.nr_banks; i++ )
> +    {
> +        printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
> +               d,
> +               i,
> +               kinfo->mem.bank[i].start,
> +               kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
> +               /* Don't want format this as PRIpaddr (16 digit hex) */
> +               (unsigned long)(kinfo->mem.bank[i].size >> 20));
> +    }
> +
> +    return;
> +
> +fail:
> +    panic("Failed to allocate requested domain memory."
> +          /* Don't want format this as PRIpaddr (16 digit hex) */
> +          " %ldKB unallocated. Fix the VMs configurations.\n",
> +          (unsigned long)kinfo->unassigned_mem >> 10);
> +}
> +
> +#ifdef CONFIG_VGICV2
> +static int __init make_gicv2_domU_node(struct kernel_info *kinfo)
> +{
> +    void *fdt = kinfo->fdt;
> +    int res = 0;
> +    __be32 reg[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) * 2];
> +    __be32 *cells;
> +    const struct domain *d = kinfo->d;
> +
> +    res = domain_fdt_begin_node(fdt, "interrupt-controller",
> +                                vgic_dist_base(&d->arch.vgic));
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "#address-cells", 0);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_string(fdt, "compatible", "arm,gic-400");
> +    if ( res )
> +        return res;
> +
> +    cells = &reg[0];
> +    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> +                       vgic_dist_base(&d->arch.vgic), GUEST_GICD_SIZE);
> +    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> +                       vgic_cpu_base(&d->arch.vgic), GUEST_GICC_SIZE);
> +
> +    res = fdt_property(fdt, "reg", reg, sizeof(reg));
> +    if (res)
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
> +    if (res)
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
> +    if (res)
> +        return res;
> +
> +    res = fdt_end_node(fdt);
> +
> +    return res;
> +}
> +#endif
> +
> +#ifdef CONFIG_GICV3
> +static int __init make_gicv3_domU_node(struct kernel_info *kinfo)
> +{
> +    void *fdt = kinfo->fdt;
> +    int res = 0;
> +    __be32 *reg, *cells;
> +    const struct domain *d = kinfo->d;
> +    unsigned int i, len = 0;
> +
> +    res = domain_fdt_begin_node(fdt, "interrupt-controller",
> +                                vgic_dist_base(&d->arch.vgic));
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "#address-cells", 0);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_string(fdt, "compatible", "arm,gic-v3");
> +    if ( res )
> +        return res;
> +
> +    /* reg specifies all re-distributors and Distributor. */
> +    len = (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> +          (d->arch.vgic.nr_regions + 1) * sizeof(__be32);
> +    reg = xmalloc_bytes(len);
> +    if ( reg == NULL )
> +        return -ENOMEM;
> +    cells = reg;
> +
> +    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> +                       vgic_dist_base(&d->arch.vgic), GUEST_GICV3_GICD_SIZE);
> +
> +    for ( i = 0; i < d->arch.vgic.nr_regions; i++ )
> +        dt_child_set_range(&cells,
> +                           GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> +                           d->arch.vgic.rdist_regions[i].base,
> +                           d->arch.vgic.rdist_regions[i].size);
> +
> +    res = fdt_property(fdt, "reg", reg, len);
> +    xfree(reg);
> +    if (res)
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
> +    if (res)
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
> +    if (res)
> +        return res;
> +
> +    res = fdt_end_node(fdt);
> +
> +    return res;
> +}
> +#endif
> +
> +static int __init make_gic_domU_node(struct kernel_info *kinfo)
> +{
> +    switch ( kinfo->d->arch.vgic.version )
> +    {
> +#ifdef CONFIG_GICV3
> +    case GIC_V3:
> +        return make_gicv3_domU_node(kinfo);
> +#endif
> +#ifdef CONFIG_VGICV2
> +    case GIC_V2:
> +        return make_gicv2_domU_node(kinfo);
> +#endif
> +    default:
> +        panic("Unsupported GIC version\n");
> +    }
> +}
> +
> +#ifdef CONFIG_SBSA_VUART_CONSOLE
> +static int __init make_vpl011_uart_node(struct kernel_info *kinfo)
> +{
> +    void *fdt = kinfo->fdt;
> +    int res;
> +    gic_interrupt_t intr;
> +    __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> +    __be32 *cells;
> +    struct domain *d = kinfo->d;
> +
> +    res = domain_fdt_begin_node(fdt, "sbsa-uart", d->arch.vpl011.base_addr);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_string(fdt, "compatible", "arm,sbsa-uart");
> +    if ( res )
> +        return res;
> +
> +    cells = &reg[0];
> +    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS,
> +                       GUEST_ROOT_SIZE_CELLS, d->arch.vpl011.base_addr,
> +                       GUEST_PL011_SIZE);
> +
> +    res = fdt_property(fdt, "reg", reg, sizeof(reg));
> +    if ( res )
> +        return res;
> +
> +    set_interrupt(intr, d->arch.vpl011.virq, 0xf, DT_IRQ_TYPE_LEVEL_HIGH);
> +
> +    res = fdt_property(fdt, "interrupts", intr, sizeof (intr));
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_cell(fdt, "interrupt-parent",
> +                            kinfo->phandle_gic);
> +    if ( res )
> +        return res;
> +
> +    /* Use a default baud rate of 115200. */
> +    fdt_property_u32(fdt, "current-speed", 115200);
> +
> +    res = fdt_end_node(fdt);
> +    if ( res )
> +        return res;
> +
> +    return 0;
> +}
> +#endif
> +
> +/*
> + * Scan device tree properties for passthrough specific information.
> + * Returns < 0 on error
> + *         0 on success
> + */
> +static int __init handle_passthrough_prop(struct kernel_info *kinfo,
> +                                          const struct fdt_property *xen_reg,
> +                                          const struct fdt_property *xen_path,
> +                                          bool xen_force,
> +                                          uint32_t address_cells,
> +                                          uint32_t size_cells)
> +{
> +    const __be32 *cell;
> +    unsigned int i, len;
> +    struct dt_device_node *node;
> +    int res;
> +    paddr_t mstart, size, gstart;
> +
> +    /* xen,reg specifies where to map the MMIO region */
> +    cell = (const __be32 *)xen_reg->data;
> +    len = fdt32_to_cpu(xen_reg->len) / ((address_cells * 2 + size_cells) *
> +                                        sizeof(uint32_t));
> +
> +    for ( i = 0; i < len; i++ )
> +    {
> +        device_tree_get_reg(&cell, address_cells, size_cells,
> +                            &mstart, &size);
> +        gstart = dt_next_cell(address_cells, &cell);
> +
> +        if ( gstart & ~PAGE_MASK || mstart & ~PAGE_MASK || size & ~PAGE_MASK )
> +        {
> +            printk(XENLOG_ERR
> +                   "DomU passthrough config has not page aligned addresses/sizes\n");
> +            return -EINVAL;
> +        }
> +
> +        res = iomem_permit_access(kinfo->d, paddr_to_pfn(mstart),
> +                                  paddr_to_pfn(PAGE_ALIGN(mstart + size - 1)));
> +        if ( res )
> +        {
> +            printk(XENLOG_ERR "Unable to permit to dom%d access to"
> +                   " 0x%"PRIpaddr" - 0x%"PRIpaddr"\n",
> +                   kinfo->d->domain_id,
> +                   mstart & PAGE_MASK, PAGE_ALIGN(mstart + size) - 1);
> +            return res;
> +        }
> +
> +        res = map_regions_p2mt(kinfo->d,
> +                               gaddr_to_gfn(gstart),
> +                               PFN_DOWN(size),
> +                               maddr_to_mfn(mstart),
> +                               p2m_mmio_direct_dev);
> +        if ( res < 0 )
> +        {
> +            printk(XENLOG_ERR
> +                   "Failed to map %"PRIpaddr" to the guest at%"PRIpaddr"\n",
> +                   mstart, gstart);
> +            return -EFAULT;
> +        }
> +    }
> +
> +    /*
> +     * If xen_force, we let the user assign a MMIO region with no
> +     * associated path.
> +     */
> +    if ( xen_path == NULL )
> +        return xen_force ? 0 : -EINVAL;
> +
> +    /*
> +     * xen,path specifies the corresponding node in the host DT.
> +     * Both interrupt mappings and IOMMU settings are based on it,
> +     * as they are done based on the corresponding host DT node.
> +     */
> +    node = dt_find_node_by_path(xen_path->data);
> +    if ( node == NULL )
> +    {
> +        printk(XENLOG_ERR "Couldn't find node %s in host_dt!\n",
> +               (char *)xen_path->data);
> +        return -EINVAL;
> +    }
> +
> +    res = map_device_irqs_to_domain(kinfo->d, node, true, NULL);
> +    if ( res < 0 )
> +        return res;
> +
> +    res = iommu_add_dt_device(node);
> +    if ( res < 0 )
> +        return res;
> +
> +    /* If xen_force, we allow assignment of devices without IOMMU protection. */
> +    if ( xen_force && !dt_device_is_protected(node) )
> +        return 0;
> +
> +    return iommu_assign_dt_device(kinfo->d, node);
> +}
> +
> +static int __init handle_prop_pfdt(struct kernel_info *kinfo,
> +                                   const void *pfdt, int nodeoff,
> +                                   uint32_t address_cells, uint32_t size_cells,
> +                                   bool scan_passthrough_prop)
> +{
> +    void *fdt = kinfo->fdt;
> +    int propoff, nameoff, res;
> +    const struct fdt_property *prop, *xen_reg = NULL, *xen_path = NULL;
> +    const char *name;
> +    bool found, xen_force = false;
> +
> +    for ( propoff = fdt_first_property_offset(pfdt, nodeoff);
> +          propoff >= 0;
> +          propoff = fdt_next_property_offset(pfdt, propoff) )
> +    {
> +        if ( !(prop = fdt_get_property_by_offset(pfdt, propoff, NULL)) )
> +            return -FDT_ERR_INTERNAL;
> +
> +        found = false;
> +        nameoff = fdt32_to_cpu(prop->nameoff);
> +        name = fdt_string(pfdt, nameoff);
> +
> +        if ( scan_passthrough_prop )
> +        {
> +            if ( dt_prop_cmp("xen,reg", name) == 0 )
> +            {
> +                xen_reg = prop;
> +                found = true;
> +            }
> +            else if ( dt_prop_cmp("xen,path", name) == 0 )
> +            {
> +                xen_path = prop;
> +                found = true;
> +            }
> +            else if ( dt_prop_cmp("xen,force-assign-without-iommu",
> +                                  name) == 0 )
> +            {
> +                xen_force = true;
> +                found = true;
> +            }
> +        }
> +
> +        /*
> +         * Copy properties other than the ones above: xen,reg, xen,path,
> +         * and xen,force-assign-without-iommu.
> +         */
> +        if ( !found )
> +        {
> +            res = fdt_property(fdt, name, prop->data, fdt32_to_cpu(prop->len));
> +            if ( res )
> +                return res;
> +        }
> +    }
> +
> +    /*
> +     * Only handle passthrough properties if both xen,reg and xen,path
> +     * are present, or if xen,force-assign-without-iommu is specified.
> +     */
> +    if ( xen_reg != NULL && (xen_path != NULL || xen_force) )
> +    {
> +        res = handle_passthrough_prop(kinfo, xen_reg, xen_path, xen_force,
> +                                      address_cells, size_cells);
> +        if ( res < 0 )
> +        {
> +            printk(XENLOG_ERR "Failed to assign device to %pd\n", kinfo->d);
> +            return res;
> +        }
> +    }
> +    else if ( (xen_path && !xen_reg) || (xen_reg && !xen_path && !xen_force) )
> +    {
> +        printk(XENLOG_ERR "xen,reg or xen,path missing for %pd\n",
> +               kinfo->d);
> +        return -EINVAL;
> +    }
> +
> +    /* FDT_ERR_NOTFOUND => There is no more properties for this node */
> +    return ( propoff != -FDT_ERR_NOTFOUND ) ? propoff : 0;
> +}
> +
> +static int __init scan_pfdt_node(struct kernel_info *kinfo, const void *pfdt,
> +                                 int nodeoff,
> +                                 uint32_t address_cells, uint32_t size_cells,
> +                                 bool scan_passthrough_prop)
> +{
> +    int rc = 0;
> +    void *fdt = kinfo->fdt;
> +    int node_next;
> +
> +    rc = fdt_begin_node(fdt, fdt_get_name(pfdt, nodeoff, NULL));
> +    if ( rc )
> +        return rc;
> +
> +    rc = handle_prop_pfdt(kinfo, pfdt, nodeoff, address_cells, size_cells,
> +                          scan_passthrough_prop);
> +    if ( rc )
> +        return rc;
> +
> +    address_cells = device_tree_get_u32(pfdt, nodeoff, "#address-cells",
> +                                        DT_ROOT_NODE_ADDR_CELLS_DEFAULT);
> +    size_cells = device_tree_get_u32(pfdt, nodeoff, "#size-cells",
> +                                     DT_ROOT_NODE_SIZE_CELLS_DEFAULT);
> +
> +    node_next = fdt_first_subnode(pfdt, nodeoff);
> +    while ( node_next > 0 )
> +    {
> +        scan_pfdt_node(kinfo, pfdt, node_next, address_cells, size_cells,
> +                       scan_passthrough_prop);
> +        node_next = fdt_next_subnode(pfdt, node_next);
> +    }
> +
> +    return fdt_end_node(fdt);
> +}
> +
> +static int __init check_partial_fdt(void *pfdt, size_t size)
> +{
> +    int res;
> +
> +    if ( fdt_magic(pfdt) != FDT_MAGIC )
> +    {
> +        dprintk(XENLOG_ERR, "Partial FDT is not a valid Flat Device Tree");
> +        return -EINVAL;
> +    }
> +
> +    res = fdt_check_header(pfdt);
> +    if ( res )
> +    {
> +        dprintk(XENLOG_ERR, "Failed to check the partial FDT (%d)", res);
> +        return -EINVAL;
> +    }
> +
> +    if ( fdt_totalsize(pfdt) > size )
> +    {
> +        dprintk(XENLOG_ERR, "Partial FDT totalsize is too big");
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
> +static int __init domain_handle_dtb_bootmodule(struct domain *d,
> +                                               struct kernel_info *kinfo)
> +{
> +    void *pfdt;
> +    int res, node_next;
> +
> +    pfdt = ioremap_cache(kinfo->dtb_bootmodule->start,
> +                         kinfo->dtb_bootmodule->size);
> +    if ( pfdt == NULL )
> +        return -EFAULT;
> +
> +    res = check_partial_fdt(pfdt, kinfo->dtb_bootmodule->size);
> +    if ( res < 0 )
> +        goto out;
> +
> +    for ( node_next = fdt_first_subnode(pfdt, 0);
> +          node_next > 0;
> +          node_next = fdt_next_subnode(pfdt, node_next) )
> +    {
> +        const char *name = fdt_get_name(pfdt, node_next, NULL);
> +
> +        if ( name == NULL )
> +            continue;
> +
> +        /*
> +         * Only scan /gic /aliases /passthrough, ignore the rest.
> +         * They don't have to be parsed in order.
> +         *
> +         * Take the GIC phandle value from the special /gic node in the
> +         * DTB fragment.
> +         */
> +        if ( dt_node_cmp(name, "gic") == 0 )
> +        {
> +            kinfo->phandle_gic = fdt_get_phandle(pfdt, node_next);
> +            continue;
> +        }
> +
> +        if ( dt_node_cmp(name, "aliases") == 0 )
> +        {
> +            res = scan_pfdt_node(kinfo, pfdt, node_next,
> +                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
> +                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
> +                                 false);
> +            if ( res )
> +                goto out;
> +            continue;
> +        }
> +        if ( dt_node_cmp(name, "passthrough") == 0 )
> +        {
> +            res = scan_pfdt_node(kinfo, pfdt, node_next,
> +                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
> +                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
> +                                 true);
> +            if ( res )
> +                goto out;
> +            continue;
> +        }
> +    }
> +
> + out:
> +    iounmap(pfdt);
> +
> +    return res;
> +}
> +
> +/*
> + * The max size for DT is 2MB. However, the generated DT is small (not including
> + * domU passthrough DT nodes whose size we account separately), 4KB are enough
> + * for now, but we might have to increase it in the future.
> + */
> +#define DOMU_DTB_SIZE 4096
> +static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
> +{
> +    int addrcells, sizecells;
> +    int ret, fdt_size = DOMU_DTB_SIZE;
> +
> +    kinfo->phandle_gic = GUEST_PHANDLE_GIC;
> +    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
> +    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
> +
> +    addrcells = GUEST_ROOT_ADDRESS_CELLS;
> +    sizecells = GUEST_ROOT_SIZE_CELLS;
> +
> +    /* Account for domU passthrough DT size */
> +    if ( kinfo->dtb_bootmodule )
> +        fdt_size += kinfo->dtb_bootmodule->size;
> +
> +    /* Cap to max DT size if needed */
> +    fdt_size = min(fdt_size, SZ_2M);
> +
> +    kinfo->fdt = xmalloc_bytes(fdt_size);
> +    if ( kinfo->fdt == NULL )
> +        return -ENOMEM;
> +
> +    ret = fdt_create(kinfo->fdt, fdt_size);
> +    if ( ret < 0 )
> +        goto err;
> +
> +    ret = fdt_finish_reservemap(kinfo->fdt);
> +    if ( ret < 0 )
> +        goto err;
> +
> +    ret = fdt_begin_node(kinfo->fdt, "");
> +    if ( ret < 0 )
> +        goto err;
> +
> +    ret = fdt_property_cell(kinfo->fdt, "#address-cells", addrcells);
> +    if ( ret )
> +        goto err;
> +
> +    ret = fdt_property_cell(kinfo->fdt, "#size-cells", sizecells);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_chosen_node(kinfo);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_psci_node(kinfo->fdt);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_cpus_node(d, kinfo->fdt);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_memory_node(d, kinfo->fdt, addrcells, sizecells, &kinfo->mem);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_resv_memory_node(d, kinfo->fdt, addrcells, sizecells,
> +                                &kinfo->shm_mem);
> +    if ( ret )
> +        goto err;
> +
> +    /*
> +     * domain_handle_dtb_bootmodule has to be called before the rest of
> +     * the device tree is generated because it depends on the value of
> +     * the field phandle_gic.
> +     */
> +    if ( kinfo->dtb_bootmodule )
> +    {
> +        ret = domain_handle_dtb_bootmodule(d, kinfo);
> +        if ( ret )
> +            goto err;
> +    }
> +
> +    ret = make_gic_domU_node(kinfo);
> +    if ( ret )
> +        goto err;
> +
> +    ret = make_timer_node(kinfo);
> +    if ( ret )
> +        goto err;
> +
> +    if ( kinfo->vpl011 )
> +    {
> +        ret = -EINVAL;
> +#ifdef CONFIG_SBSA_VUART_CONSOLE
> +        ret = make_vpl011_uart_node(kinfo);
> +#endif
> +        if ( ret )
> +            goto err;
> +    }
> +
> +    if ( kinfo->dom0less_feature & DOM0LESS_ENHANCED_NO_XS )
> +    {
> +        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
> +        if ( ret )
> +            goto err;
> +    }
> +
> +    ret = fdt_end_node(kinfo->fdt);
> +    if ( ret < 0 )
> +        goto err;
> +
> +    ret = fdt_finish(kinfo->fdt);
> +    if ( ret < 0 )
> +        goto err;
> +
> +    return 0;
> +
> +  err:
> +    printk("Device tree generation failed (%d).\n", ret);
> +    xfree(kinfo->fdt);
> +
> +    return -EINVAL;
> +}
> +
> +static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
> +                                             unsigned int smp_cpus)
> +{
> +    /*
> +     * Keep in sync with libxl__get_required_paging_memory().
> +     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
> +     * plus 128 pages to cover extended regions.
> +     */
> +    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
> +
> +    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
> +
> +    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
> +}
> +
> +static int __init alloc_xenstore_evtchn(struct domain *d)
> +{
> +    evtchn_alloc_unbound_t alloc;
> +    int rc;
> +
> +    alloc.dom = d->domain_id;
> +    alloc.remote_dom = hardware_domain->domain_id;
> +    rc = evtchn_alloc_unbound(&alloc, 0);
> +    if ( rc )
> +    {
> +        printk("Failed allocating event channel for domain\n");
> +        return rc;
> +    }
> +
> +    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
> +
> +    return 0;
> +}
> +
> +static int __init construct_domU(struct domain *d,
> +                                 const struct dt_device_node *node)
> +{
> +    struct kernel_info kinfo = {};
> +    const char *dom0less_enhanced;
> +    int rc;
> +    u64 mem;
> +    u32 p2m_mem_mb;
> +    unsigned long p2m_pages;
> +
> +    rc = dt_property_read_u64(node, "memory", &mem);
> +    if ( !rc )
> +    {
> +        printk("Error building DomU: cannot read \"memory\" property\n");
> +        return -EINVAL;
> +    }
> +    kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
> +
> +    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
> +    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
> +    p2m_pages = rc ?
> +                p2m_mem_mb << (20 - PAGE_SHIFT) :
> +                domain_p2m_pages(mem, d->max_vcpus);
> +
> +    spin_lock(&d->arch.paging.lock);
> +    rc = p2m_set_allocation(d, p2m_pages, NULL);
> +    spin_unlock(&d->arch.paging.lock);
> +    if ( rc != 0 )
> +        return rc;
> +
> +    printk("*** LOADING DOMU cpus=%u memory=%#"PRIx64"KB ***\n",
> +           d->max_vcpus, mem);
> +
> +    kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
> +
> +    rc = dt_property_read_string(node, "xen,enhanced", &dom0less_enhanced);
> +    if ( rc == -EILSEQ ||
> +         rc == -ENODATA ||
> +         (rc == 0 && !strcmp(dom0less_enhanced, "enabled")) )
> +    {
> +        if ( hardware_domain )
> +            kinfo.dom0less_feature = DOM0LESS_ENHANCED;
> +        else
> +            panic("At the moment, Xenstore support requires dom0 to be present\n");
> +    }
> +    else if ( rc == 0 && !strcmp(dom0less_enhanced, "no-xenstore") )
> +        kinfo.dom0less_feature = DOM0LESS_ENHANCED_NO_XS;
> +
> +    if ( vcpu_create(d, 0) == NULL )
> +        return -ENOMEM;
> +
> +    d->max_pages = ((paddr_t)mem * SZ_1K) >> PAGE_SHIFT;
> +
> +    kinfo.d = d;
> +
> +    rc = kernel_probe(&kinfo, node);
> +    if ( rc < 0 )
> +        return rc;
> +
> +#ifdef CONFIG_ARM_64
> +    /* type must be set before allocate memory */
> +    d->arch.type = kinfo.type;
> +#endif
> +    if ( !dt_find_property(node, "xen,static-mem", NULL) )
> +        allocate_memory(d, &kinfo);
> +    else if ( !is_domain_direct_mapped(d) )
> +        allocate_static_memory(d, &kinfo, node);
> +    else
> +        assign_static_memory_11(d, &kinfo, node);
> +
> +#ifdef CONFIG_STATIC_SHM
> +    rc = process_shm(d, &kinfo, node);
> +    if ( rc < 0 )
> +        return rc;
> +#endif
> +
> +    /*
> +     * Base address and irq number are needed when creating vpl011 device
> +     * tree node in prepare_dtb_domU, so initialization on related variables
> +     * shall be done first.
> +     */
> +    if ( kinfo.vpl011 )
> +    {
> +        rc = domain_vpl011_init(d, NULL);
> +        if ( rc < 0 )
> +            return rc;
> +    }
> +
> +    rc = prepare_dtb_domU(d, &kinfo);
> +    if ( rc < 0 )
> +        return rc;
> +
> +    rc = construct_domain(d, &kinfo);
> +    if ( rc < 0 )
> +        return rc;
> +
> +    if ( kinfo.dom0less_feature & DOM0LESS_XENSTORE )
> +    {
> +        ASSERT(hardware_domain);
> +        rc = alloc_xenstore_evtchn(d);
> +        if ( rc < 0 )
> +            return rc;
> +        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
> +    }
> +
> +    return rc;
> +}
> +
> +void __init create_domUs(void)
> +{
> +    struct dt_device_node *node;
> +    const struct dt_device_node *cpupool_node,
> +                                *chosen = dt_find_node_by_path("/chosen");
> +
> +    BUG_ON(chosen == NULL);
> +    dt_for_each_child_node(chosen, node)
> +    {
> +        struct domain *d;
> +        struct xen_domctl_createdomain d_cfg = {
> +            .arch.gic_version = XEN_DOMCTL_CONFIG_GIC_NATIVE,
> +            .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
> +            /*
> +             * The default of 1023 should be sufficient for guests because
> +             * on ARM we don't bind physical interrupts to event channels.
> +             * The only use of the evtchn port is inter-domain communications.
> +             * 1023 is also the default value used in libxl.
> +             */
> +            .max_evtchn_port = 1023,
> +            .max_grant_frames = -1,
> +            .max_maptrack_frames = -1,
> +            .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
> +        };
> +        unsigned int flags = 0U;
> +        uint32_t val;
> +        int rc;
> +
> +        if ( !dt_device_is_compatible(node, "xen,domain") )
> +            continue;
> +
> +        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
> +            panic("No more domain IDs available\n");
> +
> +        if ( dt_find_property(node, "xen,static-mem", NULL) )
> +            flags |= CDF_staticmem;
> +
> +        if ( dt_property_read_bool(node, "direct-map") )
> +        {
> +            if ( !(flags & CDF_staticmem) )
> +                panic("direct-map is not valid for domain %s without static allocation.\n",
> +                      dt_node_name(node));
> +
> +            flags |= CDF_directmap;
> +        }
> +
> +        if ( !dt_property_read_u32(node, "cpus", &d_cfg.max_vcpus) )
> +            panic("Missing property 'cpus' for domain %s\n",
> +                  dt_node_name(node));
> +
> +        if ( dt_find_compatible_node(node, NULL, "multiboot,device-tree") &&
> +             iommu_enabled )
> +            d_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> +
> +        if ( !dt_property_read_u32(node, "nr_spis", &d_cfg.arch.nr_spis) )
> +        {
> +            int vpl011_virq = GUEST_VPL011_SPI;
> +
> +            d_cfg.arch.nr_spis = gic_number_lines() - 32;
> +
> +            /*
> +             * The VPL011 virq is GUEST_VPL011_SPI, unless direct-map is
> +             * set, in which case it'll match the hardware.
> +             *
> +             * Since the domain is not yet created, we can't use
> +             * d->arch.vpl011.irq. So the logic to find the vIRQ has to
> +             * be hardcoded.
> +             * The logic here shall be consistent with the one in
> +             * domain_vpl011_init().
> +             */
> +            if ( flags & CDF_directmap )
> +            {
> +                vpl011_virq = serial_irq(SERHND_DTUART);
> +                if ( vpl011_virq < 0 )
> +                    panic("Error getting IRQ number for this serial port %d\n",
> +                          SERHND_DTUART);
> +            }
> +
> +            /*
> +             * vpl011 uses one emulated SPI. If vpl011 is requested, make
> +             * sure that we allocate enough SPIs for it.
> +             */
> +            if ( dt_property_read_bool(node, "vpl011") )
> +                d_cfg.arch.nr_spis = MAX(d_cfg.arch.nr_spis,
> +                                         vpl011_virq - 32 + 1);
> +        }
> +
> +        /* Get the optional property domain-cpupool */
> +        cpupool_node = dt_parse_phandle(node, "domain-cpupool", 0);
> +        if ( cpupool_node )
> +        {
> +            int pool_id = btcpupools_get_domain_pool_id(cpupool_node);
> +            if ( pool_id < 0 )
> +                panic("Error getting cpupool id from domain-cpupool (%d)\n",
> +                      pool_id);
> +            d_cfg.cpupool_id = pool_id;
> +        }
> +
> +        if ( dt_property_read_u32(node, "max_grant_version", &val) )
> +            d_cfg.grant_opts = XEN_DOMCTL_GRANT_version(val);
> +
> +        if ( dt_property_read_u32(node, "max_grant_frames", &val) )
> +        {
> +            if ( val > INT32_MAX )
> +                panic("max_grant_frames (%"PRIu32") overflow\n", val);
> +            d_cfg.max_grant_frames = val;
> +        }
> +
> +        if ( dt_property_read_u32(node, "max_maptrack_frames", &val) )
> +        {
> +            if ( val > INT32_MAX )
> +                panic("max_maptrack_frames (%"PRIu32") overflow\n", val);
> +            d_cfg.max_maptrack_frames = val;
> +        }
> +
> +        if ( dt_get_property(node, "sve", &val) )
> +        {
> +#ifdef CONFIG_ARM64_SVE
> +            unsigned int sve_vl_bits;
> +            bool ret = false;
> +
> +            if ( !val )
> +            {
> +                /* Property found with no value, means max HW VL supported */
> +                ret = sve_domctl_vl_param(-1, &sve_vl_bits);
> +            }
> +            else
> +            {
> +                if ( dt_property_read_u32(node, "sve", &val) )
> +                    ret = sve_domctl_vl_param(val, &sve_vl_bits);
> +                else
> +                    panic("Error reading 'sve' property\n");
> +            }
> +
> +            if ( ret )
> +                d_cfg.arch.sve_vl = sve_encode_vl(sve_vl_bits);
> +            else
> +                panic("SVE vector length error\n");
> +#else
> +            panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n");
> +#endif
> +        }
> +
> +        /*
> +         * The variable max_init_domid is initialized with zero, so here it's
> +         * very important to use the pre-increment operator to call
> +         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> +         */
> +        d = domain_create(++max_init_domid, &d_cfg, flags);
> +        if ( IS_ERR(d) )
> +            panic("Error creating domain %s (rc = %ld)\n",
> +                  dt_node_name(node), PTR_ERR(d));
> +
> +        d->is_console = true;
> +        dt_device_set_used_by(node, d->domain_id);
> +
> +        rc = construct_domU(d, node);
> +        if ( rc )
> +            panic("Could not set up domain %s (rc = %d)\n",
> +                  dt_node_name(node), rc);
> +    }
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index f69f238ccea4..a0d656b33629 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -28,6 +28,7 @@
>  #include <asm/setup.h>
>  #include <asm/arm64/sve.h>
>  #include <asm/cpufeature.h>
> +#include <asm/dom0less-build.h>
>  #include <asm/domain_build.h>
>  #include <xen/event.h>
>  
> @@ -117,7 +118,7 @@ struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
>      return vcpu_create(dom0, 0);
>  }
>  
> -static unsigned int __init get_allocation_size(paddr_t size)
> +unsigned int __init get_allocation_size(paddr_t size)
>  {
>      /*
>       * get_order_from_bytes returns the order greater than or equal to
> @@ -413,113 +414,6 @@ static void __init allocate_memory_11(struct domain *d,
>      }
>  }
>  
> -static bool __init allocate_bank_memory(struct domain *d,
> -                                        struct kernel_info *kinfo,
> -                                        gfn_t sgfn,
> -                                        paddr_t tot_size)
> -{
> -    int res;
> -    struct page_info *pg;
> -    struct membank *bank;
> -    unsigned int max_order = ~0;
> -
> -    /*
> -     * allocate_bank_memory can be called with a tot_size of zero for
> -     * the second memory bank. It is not an error and we can safely
> -     * avoid creating a zero-size memory bank.
> -     */
> -    if ( tot_size == 0 )
> -        return true;
> -
> -    bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
> -    bank->start = gfn_to_gaddr(sgfn);
> -    bank->size = tot_size;
> -
> -    while ( tot_size > 0 )
> -    {
> -        unsigned int order = get_allocation_size(tot_size);
> -
> -        order = min(max_order, order);
> -
> -        pg = alloc_domheap_pages(d, order, 0);
> -        if ( !pg )
> -        {
> -            /*
> -             * If we can't allocate one page, then it is unlikely to
> -             * succeed in the next iteration. So bail out.
> -             */
> -            if ( !order )
> -                return false;
> -
> -            /*
> -             * If we can't allocate memory with order, then it is
> -             * unlikely to succeed in the next iteration.
> -             * Record the order - 1 to avoid re-trying.
> -             */
> -            max_order = order - 1;
> -            continue;
> -        }
> -
> -        res = guest_physmap_add_page(d, sgfn, page_to_mfn(pg), order);
> -        if ( res )
> -        {
> -            dprintk(XENLOG_ERR, "Failed map pages to DOMU: %d", res);
> -            return false;
> -        }
> -
> -        sgfn = gfn_add(sgfn, 1UL << order);
> -        tot_size -= (1ULL << (PAGE_SHIFT + order));
> -    }
> -
> -    kinfo->mem.nr_banks++;
> -    kinfo->unassigned_mem -= bank->size;
> -
> -    return true;
> -}
> -
> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
> -{
> -    unsigned int i;
> -    paddr_t bank_size;
> -
> -    printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
> -           /* Don't want format this as PRIpaddr (16 digit hex) */
> -           (unsigned long)(kinfo->unassigned_mem >> 20), d);
> -
> -    kinfo->mem.nr_banks = 0;
> -    bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem);
> -    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM0_BASE),
> -                               bank_size) )
> -        goto fail;
> -
> -    bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem);
> -    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM1_BASE),
> -                               bank_size) )
> -        goto fail;
> -
> -    if ( kinfo->unassigned_mem )
> -        goto fail;
> -
> -    for( i = 0; i < kinfo->mem.nr_banks; i++ )
> -    {
> -        printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
> -               d,
> -               i,
> -               kinfo->mem.bank[i].start,
> -               kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
> -               /* Don't want format this as PRIpaddr (16 digit hex) */
> -               (unsigned long)(kinfo->mem.bank[i].size >> 20));
> -    }
> -
> -    return;
> -
> -fail:
> -    panic("Failed to allocate requested domain memory."
> -          /* Don't want format this as PRIpaddr (16 digit hex) */
> -          " %ldKB unallocated. Fix the VMs configurations.\n",
> -          (unsigned long)kinfo->unassigned_mem >> 10);
> -}
> -
>  #ifdef CONFIG_STATIC_MEMORY
>  static bool __init append_static_memory_to_bank(struct domain *d,
>                                                  struct membank *bank,
> @@ -599,9 +493,8 @@ static int __init parse_static_mem_prop(const struct dt_device_node *node,
>  }
>  
>  /* Allocate memory from static memory as RAM for one specific domain d. */
> -static void __init allocate_static_memory(struct domain *d,
> -                                          struct kernel_info *kinfo,
> -                                          const struct dt_device_node *node)
> +void __init allocate_static_memory(struct domain *d, struct kernel_info *kinfo,
> +                                   const struct dt_device_node *node)
>  {
>      u32 addr_cells, size_cells, reg_cells;
>      unsigned int nr_banks, gbank, bank = 0;
> @@ -705,9 +598,8 @@ static void __init allocate_static_memory(struct domain *d,
>   * The static memory will be directly mapped in the guest(Guest Physical
>   * Address == Physical Address).
>   */
> -static void __init assign_static_memory_11(struct domain *d,
> -                                           struct kernel_info *kinfo,
> -                                           const struct dt_device_node *node)
> +void __init assign_static_memory_11(struct domain *d, struct kernel_info *kinfo,
> +                                    const struct dt_device_node *node)
>  {
>      u32 addr_cells, size_cells, reg_cells;
>      unsigned int nr_banks, bank = 0;
> @@ -941,8 +833,8 @@ static int __init append_shm_bank_to_domain(struct kernel_info *kinfo,
>      return 0;
>  }
>  
> -static int __init process_shm(struct domain *d, struct kernel_info *kinfo,
> -                              const struct dt_device_node *node)
> +int __init process_shm(struct domain *d, struct kernel_info *kinfo,
> +                       const struct dt_device_node *node)
>  {
>      struct dt_device_node *shm_node;
>  
> @@ -1047,20 +939,6 @@ static int __init process_shm(struct domain *d, struct kernel_info *kinfo,
>      return 0;
>  }
>  #endif /* CONFIG_STATIC_SHM */
> -#else
> -static void __init allocate_static_memory(struct domain *d,
> -                                          struct kernel_info *kinfo,
> -                                          const struct dt_device_node *node)
> -{
> -    ASSERT_UNREACHABLE();
> -}
> -
> -static void __init assign_static_memory_11(struct domain *d,
> -                                           struct kernel_info *kinfo,
> -                                           const struct dt_device_node *node)
> -{
> -    ASSERT_UNREACHABLE();
> -}
>  #endif
>  
>  /*
> @@ -1264,17 +1142,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
>      return 0;
>  }
>  
> -/*
> - * Helper to write an interrupts with the GIC format
> - * This code is assuming the irq is an PPI.
> - */
> -
> -typedef __be32 gic_interrupt_t[3];
> -
> -static void __init set_interrupt(gic_interrupt_t interrupt,
> -                                 unsigned int irq,
> -                                 unsigned int cpumask,
> -                                 unsigned int level)
> +void __init set_interrupt(gic_interrupt_t interrupt,
> +                          unsigned int irq,
> +                          unsigned int cpumask,
> +                          unsigned int level)
>  {
>      __be32 *cells = interrupt;
>      bool is_ppi = !!(irq < 32);
> @@ -1319,8 +1190,7 @@ static int __init fdt_property_interrupts(const struct kernel_info *kinfo,
>   * unit (which contains the physical address) with name to generate a
>   * node name.
>   */
> -static int __init domain_fdt_begin_node(void *fdt, const char *name,
> -                                        uint64_t unit)
> +int __init domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit)
>  {
>      /*
>       * The size of the buffer to hold the longest possible string (i.e.
> @@ -1344,10 +1214,10 @@ static int __init domain_fdt_begin_node(void *fdt, const char *name,
>      return fdt_begin_node(fdt, buf);
>  }
>  
> -static int __init make_memory_node(const struct domain *d,
> -                                   void *fdt,
> -                                   int addrcells, int sizecells,
> -                                   struct meminfo *mem)
> +int __init make_memory_node(const struct domain *d,
> +                            void *fdt,
> +                            int addrcells, int sizecells,
> +                            struct meminfo *mem)
>  {
>      unsigned int i;
>      int res, reg_size = addrcells + sizecells;
> @@ -1483,10 +1353,10 @@ static int __init make_shm_memory_node(const struct domain *d,
>  }
>  #endif
>  
> -static int __init make_resv_memory_node(const struct domain *d,
> -                                        void *fdt,
> -                                        int addrcells, int sizecells,
> -                                        const struct meminfo *mem)
> +int __init make_resv_memory_node(const struct domain *d,
> +                                 void *fdt,
> +                                 int addrcells, int sizecells,
> +                                 const struct meminfo *mem)
>  {
>      int res = 0;
>      /* Placeholder for reserved-memory\0 */
> @@ -1819,9 +1689,9 @@ static int __init find_domU_holes(const struct kernel_info *kinfo,
>      return res;
>  }
>  
> -static int __init make_hypervisor_node(struct domain *d,
> -                                       const struct kernel_info *kinfo,
> -                                       int addrcells, int sizecells)
> +int __init make_hypervisor_node(struct domain *d,
> +                                const struct kernel_info *kinfo,
> +                                int addrcells, int sizecells)
>  {
>      const char compat[] =
>          "xen,xen-" XEN_VERSION_STRING "\0"
> @@ -1938,7 +1808,7 @@ static int __init make_hypervisor_node(struct domain *d,
>      return res;
>  }
>  
> -static int __init make_psci_node(void *fdt)
> +int __init make_psci_node(void *fdt)
>  {
>      int res;
>      const char compat[] =
> @@ -1974,7 +1844,7 @@ static int __init make_psci_node(void *fdt)
>      return res;
>  }
>  
> -static int __init make_cpus_node(const struct domain *d, void *fdt)
> +int __init make_cpus_node(const struct domain *d, void *fdt)
>  {
>      int res;
>      const struct dt_device_node *cpus = dt_find_node_by_path("/cpus");
> @@ -2172,7 +2042,7 @@ static int __init make_gic_node(const struct domain *d, void *fdt,
>      return res;
>  }
>  
> -static int __init make_timer_node(const struct kernel_info *kinfo)
> +int __init make_timer_node(const struct kernel_info *kinfo)
>  {
>      void *fdt = kinfo->fdt;
>      static const struct dt_device_match timer_ids[] __initconst =
> @@ -2490,743 +2360,134 @@ static int __init handle_node(struct domain *d, struct kernel_info *kinfo,
>      return res;
>  }
>  
> -#ifdef CONFIG_VGICV2
> -static int __init make_gicv2_domU_node(struct kernel_info *kinfo)
> +static int __init prepare_dtb_hwdom(struct domain *d, struct kernel_info *kinfo)
>  {
> -    void *fdt = kinfo->fdt;
> -    int res = 0;
> -    __be32 reg[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) * 2];
> -    __be32 *cells;
> -    const struct domain *d = kinfo->d;
> -
> -    res = domain_fdt_begin_node(fdt, "interrupt-controller",
> -                                vgic_dist_base(&d->arch.vgic));
> -    if ( res )
> -        return res;
> -
> -    res = fdt_property_cell(fdt, "#address-cells", 0);
> -    if ( res )
> -        return res;
> +    const p2m_type_t default_p2mt = p2m_mmio_direct_c;
> +    const void *fdt;
> +    int new_size;
> +    int ret;
>  
> -    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
> -    if ( res )
> -        return res;
> +    ASSERT(dt_host && (dt_host->sibling == NULL));
>  
> -    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
> -    if ( res )
> -        return res;
> +    kinfo->phandle_gic = dt_interrupt_controller->phandle;
> +    fdt = device_tree_flattened;
>  
> -    res = fdt_property_string(fdt, "compatible", "arm,gic-400");
> -    if ( res )
> -        return res;
> +    new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
> +    kinfo->fdt = xmalloc_bytes(new_size);
> +    if ( kinfo->fdt == NULL )
> +        return -ENOMEM;
>  
> -    cells = &reg[0];
> -    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> -                       vgic_dist_base(&d->arch.vgic), GUEST_GICD_SIZE);
> -    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> -                       vgic_cpu_base(&d->arch.vgic), GUEST_GICC_SIZE);
> +    ret = fdt_create(kinfo->fdt, new_size);
> +    if ( ret < 0 )
> +        goto err;
>  
> -    res = fdt_property(fdt, "reg", reg, sizeof(reg));
> -    if (res)
> -        return res;
> +    fdt_finish_reservemap(kinfo->fdt);
>  
> -    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
> -    if (res)
> -        return res;
> +    ret = handle_node(d, kinfo, dt_host, default_p2mt);
> +    if ( ret )
> +        goto err;
>  
> -    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
> -    if (res)
> -        return res;
> +    ret = fdt_finish(kinfo->fdt);
> +    if ( ret < 0 )
> +        goto err;
>  
> -    res = fdt_end_node(fdt);
> +    return 0;
>  
> -    return res;
> +  err:
> +    printk("Device tree generation failed (%d).\n", ret);
> +    xfree(kinfo->fdt);
> +    return -EINVAL;
>  }
> -#endif
>  
> -#ifdef CONFIG_GICV3
> -static int __init make_gicv3_domU_node(struct kernel_info *kinfo)
> +static void __init dtb_load(struct kernel_info *kinfo)
>  {
> -    void *fdt = kinfo->fdt;
> -    int res = 0;
> -    __be32 *reg, *cells;
> -    const struct domain *d = kinfo->d;
> -    unsigned int i, len = 0;
> -
> -    res = domain_fdt_begin_node(fdt, "interrupt-controller",
> -                                vgic_dist_base(&d->arch.vgic));
> -    if ( res )
> -        return res;
> -
> -    res = fdt_property_cell(fdt, "#address-cells", 0);
> -    if ( res )
> -        return res;
> -
> -    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
> -    if ( res )
> -        return res;
> -
> -    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
> -    if ( res )
> -        return res;
> -
> -    res = fdt_property_string(fdt, "compatible", "arm,gic-v3");
> -    if ( res )
> -        return res;
> -
> -    /* reg specifies all re-distributors and Distributor. */
> -    len = (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> -          (d->arch.vgic.nr_regions + 1) * sizeof(__be32);
> -    reg = xmalloc_bytes(len);
> -    if ( reg == NULL )
> -        return -ENOMEM;
> -    cells = reg;
> -
> -    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> -                       vgic_dist_base(&d->arch.vgic), GUEST_GICV3_GICD_SIZE);
> -
> -    for ( i = 0; i < d->arch.vgic.nr_regions; i++ )
> -        dt_child_set_range(&cells,
> -                           GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> -                           d->arch.vgic.rdist_regions[i].base,
> -                           d->arch.vgic.rdist_regions[i].size);
> -
> -    res = fdt_property(fdt, "reg", reg, len);
> -    xfree(reg);
> -    if (res)
> -        return res;
> -
> -    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
> -    if (res)
> -        return res;
> -
> -    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
> -    if (res)
> -        return res;
> +    unsigned long left;
>  
> -    res = fdt_end_node(fdt);
> +    printk("Loading %pd DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
> +           kinfo->d, kinfo->dtb_paddr,
> +           kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
>  
> -    return res;
> -}
> -#endif
> +    left = copy_to_guest_phys_flush_dcache(kinfo->d, kinfo->dtb_paddr,
> +                                           kinfo->fdt,
> +                                           fdt_totalsize(kinfo->fdt));
>  
> -static int __init make_gic_domU_node(struct kernel_info *kinfo)
> -{
> -    switch ( kinfo->d->arch.vgic.version )
> -    {
> -#ifdef CONFIG_GICV3
> -    case GIC_V3:
> -        return make_gicv3_domU_node(kinfo);
> -#endif
> -#ifdef CONFIG_VGICV2
> -    case GIC_V2:
> -        return make_gicv2_domU_node(kinfo);
> -#endif
> -    default:
> -        panic("Unsupported GIC version\n");
> -    }
> +    if ( left != 0 )
> +        panic("Unable to copy the DTB to %pd memory (left = %lu bytes)\n",
> +              kinfo->d, left);
> +    xfree(kinfo->fdt);
>  }
>  
> -#ifdef CONFIG_SBSA_VUART_CONSOLE
> -static int __init make_vpl011_uart_node(struct kernel_info *kinfo)
> +static void __init initrd_load(struct kernel_info *kinfo)
>  {
> -    void *fdt = kinfo->fdt;
> +    const struct bootmodule *mod = kinfo->initrd_bootmodule;
> +    paddr_t load_addr = kinfo->initrd_paddr;
> +    paddr_t paddr, len;
> +    int node;
>      int res;
> -    gic_interrupt_t intr;
> -    __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> -    __be32 *cells;
> -    struct domain *d = kinfo->d;
> -
> -    res = domain_fdt_begin_node(fdt, "sbsa-uart", d->arch.vpl011.base_addr);
> -    if ( res )
> -        return res;
> +    __be32 val[2];
> +    __be32 *cellp;
> +    void __iomem *initrd;
>  
> -    res = fdt_property_string(fdt, "compatible", "arm,sbsa-uart");
> -    if ( res )
> -        return res;
> +    if ( !mod || !mod->size )
> +        return;
>  
> -    cells = &reg[0];
> -    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS,
> -                       GUEST_ROOT_SIZE_CELLS, d->arch.vpl011.base_addr,
> -                       GUEST_PL011_SIZE);
> +    paddr = mod->start;
> +    len = mod->size;
>  
> -    res = fdt_property(fdt, "reg", reg, sizeof(reg));
> -    if ( res )
> -        return res;
> +    printk("Loading %pd initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
> +           kinfo->d, paddr, load_addr, load_addr + len);
>  
> -    set_interrupt(intr, d->arch.vpl011.virq, 0xf, DT_IRQ_TYPE_LEVEL_HIGH);
> +    /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
> +    node = fdt_path_offset(kinfo->fdt, "/chosen");
> +    if ( node < 0 )
> +        panic("Cannot find the /chosen node\n");
>  
> -    res = fdt_property(fdt, "interrupts", intr, sizeof (intr));
> +    cellp = (__be32 *)val;
> +    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
> +    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
> +                              val, sizeof(val));
>      if ( res )
> -        return res;
> +        panic("Cannot fix up \"linux,initrd-start\" property\n");
>  
> -    res = fdt_property_cell(fdt, "interrupt-parent",
> -                            kinfo->phandle_gic);
> +    cellp = (__be32 *)val;
> +    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
> +    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
> +                              val, sizeof(val));
>      if ( res )
> -        return res;
> +        panic("Cannot fix up \"linux,initrd-end\" property\n");
>  
> -    /* Use a default baud rate of 115200. */
> -    fdt_property_u32(fdt, "current-speed", 115200);
> +    initrd = ioremap_wc(paddr, len);
> +    if ( !initrd )
> +        panic("Unable to map the hwdom initrd\n");
>  
> -    res = fdt_end_node(fdt);
> -    if ( res )
> -        return res;
> +    res = copy_to_guest_phys_flush_dcache(kinfo->d, load_addr,
> +                                          initrd, len);
> +    if ( res != 0 )
> +        panic("Unable to copy the initrd in the hwdom memory\n");
>  
> -    return 0;
> +    iounmap(initrd);
>  }
> -#endif
>  
>  /*
> - * Scan device tree properties for passthrough specific information.
> - * Returns < 0 on error
> - *         0 on success
> + * Allocate the event channel PPIs and setup the HVM_PARAM_CALLBACK_IRQ.
> + * The allocated IRQ will be found in d->arch.evtchn_irq.
> + *
> + * Note that this should only be called once all PPIs used by the
> + * hardware domain have been registered.
>   */
> -static int __init handle_passthrough_prop(struct kernel_info *kinfo,
> -                                          const struct fdt_property *xen_reg,
> -                                          const struct fdt_property *xen_path,
> -                                          bool xen_force,
> -                                          uint32_t address_cells, uint32_t size_cells)
> +void __init evtchn_allocate(struct domain *d)
>  {
> -    const __be32 *cell;
> -    unsigned int i, len;
> -    struct dt_device_node *node;
>      int res;
> -    paddr_t mstart, size, gstart;
> -
> -    /* xen,reg specifies where to map the MMIO region */
> -    cell = (const __be32 *)xen_reg->data;
> -    len = fdt32_to_cpu(xen_reg->len) / ((address_cells * 2 + size_cells) *
> -                                        sizeof(uint32_t));
> +    u64 val;
>  
> -    for ( i = 0; i < len; i++ )
> -    {
> -        device_tree_get_reg(&cell, address_cells, size_cells,
> -                            &mstart, &size);
> -        gstart = dt_next_cell(address_cells, &cell);
> +    res = vgic_allocate_ppi(d);
> +    if ( res < 0 )
> +        panic("Unable to allocate a PPI for the event channel interrupt\n");
>  
> -        if ( gstart & ~PAGE_MASK || mstart & ~PAGE_MASK || size & ~PAGE_MASK )
> -        {
> -            printk(XENLOG_ERR
> -                    "DomU passthrough config has not page aligned addresses/sizes\n");
> -            return -EINVAL;
> -        }
> +    d->arch.evtchn_irq = res;
>  
> -        res = iomem_permit_access(kinfo->d, paddr_to_pfn(mstart),
> -                                  paddr_to_pfn(PAGE_ALIGN(mstart + size - 1)));
> -        if ( res )
> -        {
> -            printk(XENLOG_ERR "Unable to permit to dom%d access to"
> -                   " 0x%"PRIpaddr" - 0x%"PRIpaddr"\n",
> -                   kinfo->d->domain_id,
> -                   mstart & PAGE_MASK, PAGE_ALIGN(mstart + size) - 1);
> -            return res;
> -        }
> -
> -        res = map_regions_p2mt(kinfo->d,
> -                               gaddr_to_gfn(gstart),
> -                               PFN_DOWN(size),
> -                               maddr_to_mfn(mstart),
> -                               p2m_mmio_direct_dev);
> -        if ( res < 0 )
> -        {
> -            printk(XENLOG_ERR
> -                   "Failed to map %"PRIpaddr" to the guest at%"PRIpaddr"\n",
> -                   mstart, gstart);
> -            return -EFAULT;
> -        }
> -    }
> -
> -    /*
> -     * If xen_force, we let the user assign a MMIO region with no
> -     * associated path.
> -     */
> -    if ( xen_path == NULL )
> -        return xen_force ? 0 : -EINVAL;
> -
> -    /*
> -     * xen,path specifies the corresponding node in the host DT.
> -     * Both interrupt mappings and IOMMU settings are based on it,
> -     * as they are done based on the corresponding host DT node.
> -     */
> -    node = dt_find_node_by_path(xen_path->data);
> -    if ( node == NULL )
> -    {
> -        printk(XENLOG_ERR "Couldn't find node %s in host_dt!\n",
> -               (char *)xen_path->data);
> -        return -EINVAL;
> -    }
> -
> -    res = map_device_irqs_to_domain(kinfo->d, node, true, NULL);
> -    if ( res < 0 )
> -        return res;
> -
> -    res = iommu_add_dt_device(node);
> -    if ( res < 0 )
> -        return res;
> -
> -    /* If xen_force, we allow assignment of devices without IOMMU protection. */
> -    if ( xen_force && !dt_device_is_protected(node) )
> -        return 0;
> -
> -    return iommu_assign_dt_device(kinfo->d, node);
> -}
> -
> -static int __init handle_prop_pfdt(struct kernel_info *kinfo,
> -                                   const void *pfdt, int nodeoff,
> -                                   uint32_t address_cells, uint32_t size_cells,
> -                                   bool scan_passthrough_prop)
> -{
> -    void *fdt = kinfo->fdt;
> -    int propoff, nameoff, res;
> -    const struct fdt_property *prop, *xen_reg = NULL, *xen_path = NULL;
> -    const char *name;
> -    bool found, xen_force = false;
> -
> -    for ( propoff = fdt_first_property_offset(pfdt, nodeoff);
> -          propoff >= 0;
> -          propoff = fdt_next_property_offset(pfdt, propoff) )
> -    {
> -        if ( !(prop = fdt_get_property_by_offset(pfdt, propoff, NULL)) )
> -            return -FDT_ERR_INTERNAL;
> -
> -        found = false;
> -        nameoff = fdt32_to_cpu(prop->nameoff);
> -        name = fdt_string(pfdt, nameoff);
> -
> -        if ( scan_passthrough_prop )
> -        {
> -            if ( dt_prop_cmp("xen,reg", name) == 0 )
> -            {
> -                xen_reg = prop;
> -                found = true;
> -            }
> -            else if ( dt_prop_cmp("xen,path", name) == 0 )
> -            {
> -                xen_path = prop;
> -                found = true;
> -            }
> -            else if ( dt_prop_cmp("xen,force-assign-without-iommu",
> -                                  name) == 0 )
> -            {
> -                xen_force = true;
> -                found = true;
> -            }
> -        }
> -
> -        /*
> -         * Copy properties other than the ones above: xen,reg, xen,path,
> -         * and xen,force-assign-without-iommu.
> -         */
> -        if ( !found )
> -        {
> -            res = fdt_property(fdt, name, prop->data, fdt32_to_cpu(prop->len));
> -            if ( res )
> -                return res;
> -        }
> -    }
> -
> -    /*
> -     * Only handle passthrough properties if both xen,reg and xen,path
> -     * are present, or if xen,force-assign-without-iommu is specified.
> -     */
> -    if ( xen_reg != NULL && (xen_path != NULL || xen_force) )
> -    {
> -        res = handle_passthrough_prop(kinfo, xen_reg, xen_path, xen_force,
> -                                      address_cells, size_cells);
> -        if ( res < 0 )
> -        {
> -            printk(XENLOG_ERR "Failed to assign device to %pd\n", kinfo->d);
> -            return res;
> -        }
> -    }
> -    else if ( (xen_path && !xen_reg) || (xen_reg && !xen_path && !xen_force) )
> -    {
> -        printk(XENLOG_ERR "xen,reg or xen,path missing for %pd\n",
> -               kinfo->d);
> -        return -EINVAL;
> -    }
> -
> -    /* FDT_ERR_NOTFOUND => There is no more properties for this node */
> -    return ( propoff != -FDT_ERR_NOTFOUND ) ? propoff : 0;
> -}
> -
> -static int __init scan_pfdt_node(struct kernel_info *kinfo, const void *pfdt,
> -                                 int nodeoff,
> -                                 uint32_t address_cells, uint32_t size_cells,
> -                                 bool scan_passthrough_prop)
> -{
> -    int rc = 0;
> -    void *fdt = kinfo->fdt;
> -    int node_next;
> -
> -    rc = fdt_begin_node(fdt, fdt_get_name(pfdt, nodeoff, NULL));
> -    if ( rc )
> -        return rc;
> -
> -    rc = handle_prop_pfdt(kinfo, pfdt, nodeoff, address_cells, size_cells,
> -                          scan_passthrough_prop);
> -    if ( rc )
> -        return rc;
> -
> -    address_cells = device_tree_get_u32(pfdt, nodeoff, "#address-cells",
> -                                        DT_ROOT_NODE_ADDR_CELLS_DEFAULT);
> -    size_cells = device_tree_get_u32(pfdt, nodeoff, "#size-cells",
> -                                     DT_ROOT_NODE_SIZE_CELLS_DEFAULT);
> -
> -    node_next = fdt_first_subnode(pfdt, nodeoff);
> -    while ( node_next > 0 )
> -    {
> -        scan_pfdt_node(kinfo, pfdt, node_next, address_cells, size_cells,
> -                       scan_passthrough_prop);
> -        node_next = fdt_next_subnode(pfdt, node_next);
> -    }
> -
> -    return fdt_end_node(fdt);
> -}
> -
> -static int __init check_partial_fdt(void *pfdt, size_t size)
> -{
> -    int res;
> -
> -    if ( fdt_magic(pfdt) != FDT_MAGIC )
> -    {
> -        dprintk(XENLOG_ERR, "Partial FDT is not a valid Flat Device Tree");
> -        return -EINVAL;
> -    }
> -
> -    res = fdt_check_header(pfdt);
> -    if ( res )
> -    {
> -        dprintk(XENLOG_ERR, "Failed to check the partial FDT (%d)", res);
> -        return -EINVAL;
> -    }
> -
> -    if ( fdt_totalsize(pfdt) > size )
> -    {
> -        dprintk(XENLOG_ERR, "Partial FDT totalsize is too big");
> -        return -EINVAL;
> -    }
> -
> -    return 0;
> -}
> -
> -static int __init domain_handle_dtb_bootmodule(struct domain *d,
> -                                               struct kernel_info *kinfo)
> -{
> -    void *pfdt;
> -    int res, node_next;
> -
> -    pfdt = ioremap_cache(kinfo->dtb_bootmodule->start,
> -                         kinfo->dtb_bootmodule->size);
> -    if ( pfdt == NULL )
> -        return -EFAULT;
> -
> -    res = check_partial_fdt(pfdt, kinfo->dtb_bootmodule->size);
> -    if ( res < 0 )
> -        goto out;
> -
> -    for ( node_next = fdt_first_subnode(pfdt, 0); 
> -          node_next > 0;
> -          node_next = fdt_next_subnode(pfdt, node_next) )
> -    {
> -        const char *name = fdt_get_name(pfdt, node_next, NULL);
> -
> -        if ( name == NULL )
> -            continue;
> -
> -        /*
> -         * Only scan /gic /aliases /passthrough, ignore the rest.
> -         * They don't have to be parsed in order.
> -         *
> -         * Take the GIC phandle value from the special /gic node in the
> -         * DTB fragment.
> -         */
> -        if ( dt_node_cmp(name, "gic") == 0 )
> -        {
> -            kinfo->phandle_gic = fdt_get_phandle(pfdt, node_next);
> -            continue;
> -        }
> -
> -        if ( dt_node_cmp(name, "aliases") == 0 )
> -        {
> -            res = scan_pfdt_node(kinfo, pfdt, node_next,
> -                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
> -                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
> -                                 false);
> -            if ( res )
> -                goto out;
> -            continue;
> -        }
> -        if ( dt_node_cmp(name, "passthrough") == 0 )
> -        {
> -            res = scan_pfdt_node(kinfo, pfdt, node_next,
> -                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
> -                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
> -                                 true);
> -            if ( res )
> -                goto out;
> -            continue;
> -        }
> -    }
> -
> - out:
> -    iounmap(pfdt);
> -
> -    return res;
> -}
> -
> -/*
> - * The max size for DT is 2MB. However, the generated DT is small (not including
> - * domU passthrough DT nodes whose size we account separately), 4KB are enough
> - * for now, but we might have to increase it in the future.
> - */
> -#define DOMU_DTB_SIZE 4096
> -static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
> -{
> -    int addrcells, sizecells;
> -    int ret, fdt_size = DOMU_DTB_SIZE;
> -
> -    kinfo->phandle_gic = GUEST_PHANDLE_GIC;
> -    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
> -    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
> -
> -    addrcells = GUEST_ROOT_ADDRESS_CELLS;
> -    sizecells = GUEST_ROOT_SIZE_CELLS;
> -
> -    /* Account for domU passthrough DT size */
> -    if ( kinfo->dtb_bootmodule )
> -        fdt_size += kinfo->dtb_bootmodule->size;
> -
> -    /* Cap to max DT size if needed */
> -    fdt_size = min(fdt_size, SZ_2M);
> -
> -    kinfo->fdt = xmalloc_bytes(fdt_size);
> -    if ( kinfo->fdt == NULL )
> -        return -ENOMEM;
> -
> -    ret = fdt_create(kinfo->fdt, fdt_size);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    ret = fdt_finish_reservemap(kinfo->fdt);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    ret = fdt_begin_node(kinfo->fdt, "");
> -    if ( ret < 0 )
> -        goto err;
> -
> -    ret = fdt_property_cell(kinfo->fdt, "#address-cells", addrcells);
> -    if ( ret )
> -        goto err;
> -
> -    ret = fdt_property_cell(kinfo->fdt, "#size-cells", sizecells);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_chosen_node(kinfo);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_psci_node(kinfo->fdt);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_cpus_node(d, kinfo->fdt);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_memory_node(d, kinfo->fdt, addrcells, sizecells, &kinfo->mem);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_resv_memory_node(d, kinfo->fdt, addrcells, sizecells,
> -                                &kinfo->shm_mem);
> -    if ( ret )
> -        goto err;
> -
> -    /*
> -     * domain_handle_dtb_bootmodule has to be called before the rest of
> -     * the device tree is generated because it depends on the value of
> -     * the field phandle_gic.
> -     */
> -    if ( kinfo->dtb_bootmodule )
> -    {
> -        ret = domain_handle_dtb_bootmodule(d, kinfo);
> -        if ( ret )
> -            goto err;
> -    }
> -
> -    ret = make_gic_domU_node(kinfo);
> -    if ( ret )
> -        goto err;
> -
> -    ret = make_timer_node(kinfo);
> -    if ( ret )
> -        goto err;
> -
> -    if ( kinfo->vpl011 )
> -    {
> -        ret = -EINVAL;
> -#ifdef CONFIG_SBSA_VUART_CONSOLE
> -        ret = make_vpl011_uart_node(kinfo);
> -#endif
> -        if ( ret )
> -            goto err;
> -    }
> -
> -    if ( kinfo->dom0less_feature & DOM0LESS_ENHANCED_NO_XS )
> -    {
> -        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
> -        if ( ret )
> -            goto err;
> -    }
> -
> -    ret = fdt_end_node(kinfo->fdt);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    ret = fdt_finish(kinfo->fdt);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    return 0;
> -
> -  err:
> -    printk("Device tree generation failed (%d).\n", ret);
> -    xfree(kinfo->fdt);
> -
> -    return -EINVAL;
> -}
> -
> -static int __init prepare_dtb_hwdom(struct domain *d, struct kernel_info *kinfo)
> -{
> -    const p2m_type_t default_p2mt = p2m_mmio_direct_c;
> -    const void *fdt;
> -    int new_size;
> -    int ret;
> -
> -    ASSERT(dt_host && (dt_host->sibling == NULL));
> -
> -    kinfo->phandle_gic = dt_interrupt_controller->phandle;
> -    fdt = device_tree_flattened;
> -
> -    new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
> -    kinfo->fdt = xmalloc_bytes(new_size);
> -    if ( kinfo->fdt == NULL )
> -        return -ENOMEM;
> -
> -    ret = fdt_create(kinfo->fdt, new_size);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    fdt_finish_reservemap(kinfo->fdt);
> -
> -    ret = handle_node(d, kinfo, dt_host, default_p2mt);
> -    if ( ret )
> -        goto err;
> -
> -    ret = fdt_finish(kinfo->fdt);
> -    if ( ret < 0 )
> -        goto err;
> -
> -    return 0;
> -
> -  err:
> -    printk("Device tree generation failed (%d).\n", ret);
> -    xfree(kinfo->fdt);
> -    return -EINVAL;
> -}
> -
> -static void __init dtb_load(struct kernel_info *kinfo)
> -{
> -    unsigned long left;
> -
> -    printk("Loading %pd DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
> -           kinfo->d, kinfo->dtb_paddr,
> -           kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
> -
> -    left = copy_to_guest_phys_flush_dcache(kinfo->d, kinfo->dtb_paddr,
> -                                           kinfo->fdt,
> -                                           fdt_totalsize(kinfo->fdt));
> -
> -    if ( left != 0 )
> -        panic("Unable to copy the DTB to %pd memory (left = %lu bytes)\n",
> -              kinfo->d, left);
> -    xfree(kinfo->fdt);
> -}
> -
> -static void __init initrd_load(struct kernel_info *kinfo)
> -{
> -    const struct bootmodule *mod = kinfo->initrd_bootmodule;
> -    paddr_t load_addr = kinfo->initrd_paddr;
> -    paddr_t paddr, len;
> -    int node;
> -    int res;
> -    __be32 val[2];
> -    __be32 *cellp;
> -    void __iomem *initrd;
> -
> -    if ( !mod || !mod->size )
> -        return;
> -
> -    paddr = mod->start;
> -    len = mod->size;
> -
> -    printk("Loading %pd initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
> -           kinfo->d, paddr, load_addr, load_addr + len);
> -
> -    /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
> -    node = fdt_path_offset(kinfo->fdt, "/chosen");
> -    if ( node < 0 )
> -        panic("Cannot find the /chosen node\n");
> -
> -    cellp = (__be32 *)val;
> -    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
> -    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
> -                              val, sizeof(val));
> -    if ( res )
> -        panic("Cannot fix up \"linux,initrd-start\" property\n");
> -
> -    cellp = (__be32 *)val;
> -    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
> -    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
> -                              val, sizeof(val));
> -    if ( res )
> -        panic("Cannot fix up \"linux,initrd-end\" property\n");
> -
> -    initrd = ioremap_wc(paddr, len);
> -    if ( !initrd )
> -        panic("Unable to map the hwdom initrd\n");
> -
> -    res = copy_to_guest_phys_flush_dcache(kinfo->d, load_addr,
> -                                          initrd, len);
> -    if ( res != 0 )
> -        panic("Unable to copy the initrd in the hwdom memory\n");
> -
> -    iounmap(initrd);
> -}
> -
> -/*
> - * Allocate the event channel PPIs and setup the HVM_PARAM_CALLBACK_IRQ.
> - * The allocated IRQ will be found in d->arch.evtchn_irq.
> - *
> - * Note that this should only be called once all PPIs used by the
> - * hardware domain have been registered.
> - */
> -void __init evtchn_allocate(struct domain *d)
> -{
> -    int res;
> -    u64 val;
> -
> -    res = vgic_allocate_ppi(d);
> -    if ( res < 0 )
> -        panic("Unable to allocate a PPI for the event channel interrupt\n");
> -
> -    d->arch.evtchn_irq = res;
> -
> -    printk("Allocating PPI %u for event channel interrupt\n",
> -           d->arch.evtchn_irq);
> +    printk("Allocating PPI %u for event channel interrupt\n",
> +           d->arch.evtchn_irq);
>  
>      /* Set the value of domain param HVM_PARAM_CALLBACK_IRQ */
>      val = MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI,
> @@ -3409,22 +2670,7 @@ static void __init find_gnttab_region(struct domain *d,
>             kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
>  }
>  
> -static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
> -                                             unsigned int smp_cpus)
> -{
> -    /*
> -     * Keep in sync with libxl__get_required_paging_memory().
> -     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
> -     * plus 128 pages to cover extended regions.
> -     */
> -    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
> -
> -    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
> -
> -    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
> -}
> -
> -static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
> +int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
>  {
>      unsigned int i;
>      struct vcpu *v = d->vcpu[0];
> @@ -3515,296 +2761,6 @@ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
>      return 0;
>  }
>  
> -static int __init alloc_xenstore_evtchn(struct domain *d)
> -{
> -    evtchn_alloc_unbound_t alloc;
> -    int rc;
> -
> -    alloc.dom = d->domain_id;
> -    alloc.remote_dom = hardware_domain->domain_id;
> -    rc = evtchn_alloc_unbound(&alloc, 0);
> -    if ( rc )
> -    {
> -        printk("Failed allocating event channel for domain\n");
> -        return rc;
> -    }
> -
> -    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
> -
> -    return 0;
> -}
> -
> -static int __init construct_domU(struct domain *d,
> -                                 const struct dt_device_node *node)
> -{
> -    struct kernel_info kinfo = {};
> -    const char *dom0less_enhanced;
> -    int rc;
> -    u64 mem;
> -    u32 p2m_mem_mb;
> -    unsigned long p2m_pages;
> -
> -    rc = dt_property_read_u64(node, "memory", &mem);
> -    if ( !rc )
> -    {
> -        printk("Error building DomU: cannot read \"memory\" property\n");
> -        return -EINVAL;
> -    }
> -    kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
> -
> -    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
> -    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
> -    p2m_pages = rc ?
> -                p2m_mem_mb << (20 - PAGE_SHIFT) :
> -                domain_p2m_pages(mem, d->max_vcpus);
> -
> -    spin_lock(&d->arch.paging.lock);
> -    rc = p2m_set_allocation(d, p2m_pages, NULL);
> -    spin_unlock(&d->arch.paging.lock);
> -    if ( rc != 0 )
> -        return rc;
> -
> -    printk("*** LOADING DOMU cpus=%u memory=%#"PRIx64"KB ***\n",
> -           d->max_vcpus, mem);
> -
> -    kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
> -
> -    rc = dt_property_read_string(node, "xen,enhanced", &dom0less_enhanced);
> -    if ( rc == -EILSEQ ||
> -         rc == -ENODATA ||
> -         (rc == 0 && !strcmp(dom0less_enhanced, "enabled")) )
> -    {
> -        if ( hardware_domain )
> -            kinfo.dom0less_feature = DOM0LESS_ENHANCED;
> -        else
> -            panic("At the moment, Xenstore support requires dom0 to be present\n");
> -    }
> -    else if ( rc == 0 && !strcmp(dom0less_enhanced, "no-xenstore") )
> -        kinfo.dom0less_feature = DOM0LESS_ENHANCED_NO_XS;
> -
> -    if ( vcpu_create(d, 0) == NULL )
> -        return -ENOMEM;
> -
> -    d->max_pages = ((paddr_t)mem * SZ_1K) >> PAGE_SHIFT;
> -
> -    kinfo.d = d;
> -
> -    rc = kernel_probe(&kinfo, node);
> -    if ( rc < 0 )
> -        return rc;
> -
> -#ifdef CONFIG_ARM_64
> -    /* type must be set before allocate memory */
> -    d->arch.type = kinfo.type;
> -#endif
> -    if ( !dt_find_property(node, "xen,static-mem", NULL) )
> -        allocate_memory(d, &kinfo);
> -    else if ( !is_domain_direct_mapped(d) )
> -        allocate_static_memory(d, &kinfo, node);
> -    else
> -        assign_static_memory_11(d, &kinfo, node);
> -
> -#ifdef CONFIG_STATIC_SHM
> -    rc = process_shm(d, &kinfo, node);
> -    if ( rc < 0 )
> -        return rc;
> -#endif
> -
> -    /*
> -     * Base address and irq number are needed when creating vpl011 device
> -     * tree node in prepare_dtb_domU, so initialization on related variables
> -     * shall be done first.
> -     */
> -    if ( kinfo.vpl011 )
> -    {
> -        rc = domain_vpl011_init(d, NULL);
> -        if ( rc < 0 )
> -            return rc;
> -    }
> -
> -    rc = prepare_dtb_domU(d, &kinfo);
> -    if ( rc < 0 )
> -        return rc;
> -
> -    rc = construct_domain(d, &kinfo);
> -    if ( rc < 0 )
> -        return rc;
> -
> -    if ( kinfo.dom0less_feature & DOM0LESS_XENSTORE )
> -    {
> -        ASSERT(hardware_domain);
> -        rc = alloc_xenstore_evtchn(d);
> -        if ( rc < 0 )
> -            return rc;
> -        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
> -    }
> -
> -    return rc;
> -}
> -
> -void __init create_domUs(void)
> -{
> -    struct dt_device_node *node;
> -    const struct dt_device_node *cpupool_node,
> -                                *chosen = dt_find_node_by_path("/chosen");
> -
> -    BUG_ON(chosen == NULL);
> -    dt_for_each_child_node(chosen, node)
> -    {
> -        struct domain *d;
> -        struct xen_domctl_createdomain d_cfg = {
> -            .arch.gic_version = XEN_DOMCTL_CONFIG_GIC_NATIVE,
> -            .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
> -            /*
> -             * The default of 1023 should be sufficient for guests because
> -             * on ARM we don't bind physical interrupts to event channels.
> -             * The only use of the evtchn port is inter-domain communications.
> -             * 1023 is also the default value used in libxl.
> -             */
> -            .max_evtchn_port = 1023,
> -            .max_grant_frames = -1,
> -            .max_maptrack_frames = -1,
> -            .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
> -        };
> -        unsigned int flags = 0U;
> -        uint32_t val;
> -        int rc;
> -
> -        if ( !dt_device_is_compatible(node, "xen,domain") )
> -            continue;
> -
> -        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
> -            panic("No more domain IDs available\n");
> -
> -        if ( dt_find_property(node, "xen,static-mem", NULL) )
> -            flags |= CDF_staticmem;
> -
> -        if ( dt_property_read_bool(node, "direct-map") )
> -        {
> -            if ( !(flags & CDF_staticmem) )
> -                panic("direct-map is not valid for domain %s without static allocation.\n",
> -                      dt_node_name(node));
> -
> -            flags |= CDF_directmap;
> -        }
> -
> -        if ( !dt_property_read_u32(node, "cpus", &d_cfg.max_vcpus) )
> -            panic("Missing property 'cpus' for domain %s\n",
> -                  dt_node_name(node));
> -
> -        if ( dt_find_compatible_node(node, NULL, "multiboot,device-tree") &&
> -             iommu_enabled )
> -            d_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> -
> -        if ( !dt_property_read_u32(node, "nr_spis", &d_cfg.arch.nr_spis) )
> -        {
> -            int vpl011_virq = GUEST_VPL011_SPI;
> -
> -            d_cfg.arch.nr_spis = gic_number_lines() - 32;
> -
> -            /*
> -             * The VPL011 virq is GUEST_VPL011_SPI, unless direct-map is
> -             * set, in which case it'll match the hardware.
> -             *
> -             * Since the domain is not yet created, we can't use
> -             * d->arch.vpl011.irq. So the logic to find the vIRQ has to
> -             * be hardcoded.
> -             * The logic here shall be consistent with the one in
> -             * domain_vpl011_init().
> -             */
> -            if ( flags & CDF_directmap )
> -            {
> -                vpl011_virq = serial_irq(SERHND_DTUART);
> -                if ( vpl011_virq < 0 )
> -                    panic("Error getting IRQ number for this serial port %d\n",
> -                          SERHND_DTUART);
> -            }
> -
> -            /*
> -             * vpl011 uses one emulated SPI. If vpl011 is requested, make
> -             * sure that we allocate enough SPIs for it.
> -             */
> -            if ( dt_property_read_bool(node, "vpl011") )
> -                d_cfg.arch.nr_spis = MAX(d_cfg.arch.nr_spis,
> -                                         vpl011_virq - 32 + 1);
> -        }
> -
> -        /* Get the optional property domain-cpupool */
> -        cpupool_node = dt_parse_phandle(node, "domain-cpupool", 0);
> -        if ( cpupool_node )
> -        {
> -            int pool_id = btcpupools_get_domain_pool_id(cpupool_node);
> -            if ( pool_id < 0 )
> -                panic("Error getting cpupool id from domain-cpupool (%d)\n",
> -                      pool_id);
> -            d_cfg.cpupool_id = pool_id;
> -        }
> -
> -        if ( dt_property_read_u32(node, "max_grant_version", &val) )
> -            d_cfg.grant_opts = XEN_DOMCTL_GRANT_version(val);
> -
> -        if ( dt_property_read_u32(node, "max_grant_frames", &val) )
> -        {
> -            if ( val > INT32_MAX )
> -                panic("max_grant_frames (%"PRIu32") overflow\n", val);
> -            d_cfg.max_grant_frames = val;
> -        }
> -
> -        if ( dt_property_read_u32(node, "max_maptrack_frames", &val) )
> -        {
> -            if ( val > INT32_MAX )
> -                panic("max_maptrack_frames (%"PRIu32") overflow\n", val);
> -            d_cfg.max_maptrack_frames = val;
> -        }
> -
> -        if ( dt_get_property(node, "sve", &val) )
> -        {
> -#ifdef CONFIG_ARM64_SVE
> -            unsigned int sve_vl_bits;
> -            bool ret = false;
> -
> -            if ( !val )
> -            {
> -                /* Property found with no value, means max HW VL supported */
> -                ret = sve_domctl_vl_param(-1, &sve_vl_bits);
> -            }
> -            else
> -            {
> -                if ( dt_property_read_u32(node, "sve", &val) )
> -                    ret = sve_domctl_vl_param(val, &sve_vl_bits);
> -                else
> -                    panic("Error reading 'sve' property\n");
> -            }
> -
> -            if ( ret )
> -                d_cfg.arch.sve_vl = sve_encode_vl(sve_vl_bits);
> -            else
> -                panic("SVE vector length error\n");
> -#else
> -            panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n");
> -#endif
> -        }
> -
> -        /*
> -         * The variable max_init_domid is initialized with zero, so here it's
> -         * very important to use the pre-increment operator to call
> -         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> -         */
> -        d = domain_create(++max_init_domid, &d_cfg, flags);
> -        if ( IS_ERR(d) )
> -            panic("Error creating domain %s (rc = %ld)\n",
> -                  dt_node_name(node), PTR_ERR(d));
> -
> -        d->is_console = true;
> -        dt_device_set_used_by(node, d->domain_id);
> -
> -        rc = construct_domU(d, node);
> -        if ( rc )
> -            panic("Could not set up domain %s (rc = %d)\n",
> -                  dt_node_name(node), rc);
> -    }
> -}
> -
>  static int __init construct_dom0(struct domain *d)
>  {
>      struct kernel_info kinfo = {};
> diff --git a/xen/arch/arm/include/asm/dom0less-build.h b/xen/arch/arm/include/asm/dom0less-build.h
> new file mode 100644
> index 000000000000..0d28fa1bee46
> --- /dev/null
> +++ b/xen/arch/arm/include/asm/dom0less-build.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * xen/arch/arm/include/asm/dom0less-build.h
> + *
> + * Copyright (C) 2023 Arm Ltd.
> + */
> +
> +#ifndef __ARM_DOM0LESS_BUILD_H_
> +#define __ARM_DOM0LESS_BUILD_H_
> +
> +#include <asm/kernel.h>
> +
> +void create_domUs(void);
> +bool is_dom0less_mode(void);
> +
> +#endif  /* __ARM_DOM0LESS_BUILD_H_ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:b
> + */
> diff --git a/xen/arch/arm/include/asm/domain_build.h b/xen/arch/arm/include/asm/domain_build.h
> index b9329c9ee032..d95cf376048f 100644
> --- a/xen/arch/arm/include/asm/domain_build.h
> +++ b/xen/arch/arm/include/asm/domain_build.h
> @@ -4,9 +4,45 @@
>  #include <xen/sched.h>
>  #include <asm/kernel.h>
>  
> +typedef __be32 gic_interrupt_t[3];
> +
> +int construct_domain(struct domain *d, struct kernel_info *kinfo);
> +int domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit);
> +int make_resv_memory_node(const struct domain *d, void *fdt, int addrcells,
> +                          int sizecells, const struct meminfo *mem);
>  int make_chosen_node(const struct kernel_info *kinfo);
> +int make_cpus_node(const struct domain *d, void *fdt);
> +int make_hypervisor_node(struct domain *d, const struct kernel_info *kinfo,
> +                         int addrcells, int sizecells);
> +int make_memory_node(const struct domain *d, void *fdt, int addrcells,
> +                     int sizecells, struct meminfo *mem);
> +int make_psci_node(void *fdt);
> +int make_timer_node(const struct kernel_info *kinfo);
>  void evtchn_allocate(struct domain *d);
>  
> +unsigned int get_allocation_size(paddr_t size);
> +
> +/*
> + * handle_device_interrupts retrieves the interrupts configuration from
> + * a device tree node and maps those interrupts to the target domain.
> + *
> + * Returns:
> + *   < 0 error
> + *   0   success
> + */
> +int handle_device_interrupts(struct domain *d, struct dt_device_node *dev,
> +                             bool need_mapping);
> +
> +/*
> + * Helper to write an interrupts with the GIC format
> + * This code is assuming the irq is an PPI.
> + */
> +void set_interrupt(gic_interrupt_t interrupt, unsigned int irq,
> +                   unsigned int cpumask, unsigned int level);
> +
> +int process_shm(struct domain *d, struct kernel_info *kinfo,
> +                const struct dt_device_node *node);
> +
>  #ifndef CONFIG_ACPI
>  static inline int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
>  {
> @@ -17,6 +53,28 @@ static inline int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
>  #else
>  int prepare_acpi(struct domain *d, struct kernel_info *kinfo);
>  #endif
> +
> +#ifdef CONFIG_STATIC_MEMORY
> +void allocate_static_memory(struct domain *d, struct kernel_info *kinfo,
> +                            const struct dt_device_node *node);
> +void assign_static_memory_11(struct domain *d, struct kernel_info *kinfo,
> +                             const struct dt_device_node *node);
> +#else
> +static inline void allocate_static_memory(struct domain *d,
> +                                          struct kernel_info *kinfo,
> +                                          const struct dt_device_node *node)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
> +static inline void assign_static_memory_11(struct domain *d,
> +                                           struct kernel_info *kinfo,
> +                                           const struct dt_device_node *node)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +#endif
> +
>  #endif
>  
>  /*
> diff --git a/xen/arch/arm/include/asm/setup.h b/xen/arch/arm/include/asm/setup.h
> index b8866c20f462..cd4c7815790b 100644
> --- a/xen/arch/arm/include/asm/setup.h
> +++ b/xen/arch/arm/include/asm/setup.h
> @@ -135,7 +135,6 @@ void acpi_create_efi_mmap_table(struct domain *d,
>  
>  int acpi_make_efi_nodes(void *fdt, struct membank tbl_add[]);
>  
> -void create_domUs(void);
>  void create_dom0(void);
>  void alloc_static_evtchn(void);
>  
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index db748839d383..db0299d540dc 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -34,6 +34,7 @@
>  #include <xen/warning.h>
>  #include <xen/hypercall.h>
>  #include <asm/alternative.h>
> +#include <asm/dom0less-build.h>
>  #include <asm/page.h>
>  #include <asm/current.h>
>  #include <asm/setup.h>
> @@ -1042,38 +1043,6 @@ static void __init setup_mm(void)
>  }
>  #endif
>  
> -static bool __init is_dom0less_mode(void)
> -{
> -    struct bootmodules *mods = &bootinfo.modules;
> -    struct bootmodule *mod;
> -    unsigned int i;
> -    bool dom0found = false;
> -    bool domUfound = false;
> -
> -    /* Look into the bootmodules */
> -    for ( i = 0 ; i < mods->nr_mods ; i++ )
> -    {
> -        mod = &mods->module[i];
> -        /* Find if dom0 and domU kernels are present */
> -        if ( mod->kind == BOOTMOD_KERNEL )
> -        {
> -            if ( mod->domU == false )
> -            {
> -                dom0found = true;
> -                break;
> -            }
> -            else
> -                domUfound = true;
> -        }
> -    }
> -
> -    /*
> -     * If there is no dom0 kernel but at least one domU, then we are in
> -     * dom0less mode
> -     */
> -    return ( !dom0found && domUfound );
> -}
> -
>  size_t __read_mostly dcache_line_bytes;
>  
>  /* C entry point for boot CPU */
> -- 
> 2.34.1
>
Luca Fancellu Sept. 29, 2023, 7:15 a.m. UTC | #2
Hi Stefano,

> On 29 Sep 2023, at 01:33, Stefano Stabellini <sstabellini@kernel.org> wrote:
> 
> On Wed, 27 Sep 2023, Luca Fancellu wrote:
>> Currently the dom0less feature code is mostly inside domain_build.c
>> and setup.c, it is a feature that may not be useful to everyone so
>> put the code in a different compilation module in order to make it
>> easier to disable the feature in the future.
>> 
>> Move gic_interrupt_t in domain_build.h to use it with the function
>> declaration, move its comment above the declaration.
>> 
>> The following functions are now visible externally from domain_build
>> because they are used also from the dom0less-build module:
>> - get_allocation_size
>> - set_interrupt
>> - domain_fdt_begin_node
>> - make_memory_node
>> - make_resv_memory_node
>> - make_hypervisor_node
>> - make_psci_node
>> - make_cpus_node
>> - make_timer_node
>> - handle_device_interrupts
>> - construct_domain
>> - process_shm
>> 
>> The functions allocate_static_memory and assign_static_memory_11
>> are now externally visible, so put their declarations into
>> domain_build.h and move the #else and stub definition in the header
>> as well.
>> 
>> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
>> externally visible.
>> 
>> Where spotted, fix code style issues.
>> 
>> No functional change is intended.
>> 
>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> 
> This is great! A couple of questions.
> 
> Why was allocate_static_memory not moved to dom0less-build.c ?

My aim is to decouple the features, so in patch 4 we move (just once as Julien suggested)
the static memory code on a module on its own, because we can have a guest booted with
dom0less feature but having it with static memory is optional.

> 
> Would it make sense to also move construct_dom0 to dom0less-build.c
> given the similarities with construct_domU? I am not sure about this.
> 

We can’t do that because the final goal of this serie is to have a Kconfig disabling dom0less,
so in that case we will end up removing from the compilation also construct_dom0.

Cheers,
Luca
Stefano Stabellini Sept. 29, 2023, 7:48 p.m. UTC | #3
On Fri, 29 Sep 2023, Luca Fancellu wrote:
> > On 29 Sep 2023, at 01:33, Stefano Stabellini <sstabellini@kernel.org> wrote:
> > 
> > On Wed, 27 Sep 2023, Luca Fancellu wrote:
> >> Currently the dom0less feature code is mostly inside domain_build.c
> >> and setup.c, it is a feature that may not be useful to everyone so
> >> put the code in a different compilation module in order to make it
> >> easier to disable the feature in the future.
> >> 
> >> Move gic_interrupt_t in domain_build.h to use it with the function
> >> declaration, move its comment above the declaration.
> >> 
> >> The following functions are now visible externally from domain_build
> >> because they are used also from the dom0less-build module:
> >> - get_allocation_size
> >> - set_interrupt
> >> - domain_fdt_begin_node
> >> - make_memory_node
> >> - make_resv_memory_node
> >> - make_hypervisor_node
> >> - make_psci_node
> >> - make_cpus_node
> >> - make_timer_node
> >> - handle_device_interrupts
> >> - construct_domain
> >> - process_shm
> >> 
> >> The functions allocate_static_memory and assign_static_memory_11
> >> are now externally visible, so put their declarations into
> >> domain_build.h and move the #else and stub definition in the header
> >> as well.
> >> 
> >> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
> >> externally visible.
> >> 
> >> Where spotted, fix code style issues.
> >> 
> >> No functional change is intended.
> >> 
> >> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> > 
> > This is great! A couple of questions.
> > 
> > Why was allocate_static_memory not moved to dom0less-build.c ?
> 
> My aim is to decouple the features, so in patch 4 we move (just once as Julien suggested)
> the static memory code on a module on its own, because we can have a guest booted with
> dom0less feature but having it with static memory is optional.

OK


> > Would it make sense to also move construct_dom0 to dom0less-build.c
> > given the similarities with construct_domU? I am not sure about this.
> > 
> 
> We can’t do that because the final goal of this serie is to have a Kconfig disabling dom0less,
> so in that case we will end up removing from the compilation also construct_dom0.

OK. Probably we can't do much better than this.


One more question on the code movement, and I would also like Julien and
Bertrand to express their opinions on this.

Given that code movement is painful from a git history perspective, and
given that we have to move dom0less code to xen/common anyway to make
it available to RISC-V and also x86, could we do it in one shot here?

I am not asking to refactor the code to make it buildable on RISC-V or
X86. I am only asking to move the code to xen/common/dom0less (or
xen/common/hyperlaunch or xen/common/domain-builder as Danien and
Christopher called it) the same way you are doing it as part of this
series. It shouldn't require additional work on your side I hope.
Julien Grall Oct. 2, 2023, 8:26 a.m. UTC | #4
Hi Stefano,

On 29/09/2023 20:48, Stefano Stabellini wrote:
> On Fri, 29 Sep 2023, Luca Fancellu wrote:
>>> On 29 Sep 2023, at 01:33, Stefano Stabellini <sstabellini@kernel.org> wrote:
>>>
>>> On Wed, 27 Sep 2023, Luca Fancellu wrote:
>>>> Currently the dom0less feature code is mostly inside domain_build.c
>>>> and setup.c, it is a feature that may not be useful to everyone so
>>>> put the code in a different compilation module in order to make it
>>>> easier to disable the feature in the future.
>>>>
>>>> Move gic_interrupt_t in domain_build.h to use it with the function
>>>> declaration, move its comment above the declaration.
>>>>
>>>> The following functions are now visible externally from domain_build
>>>> because they are used also from the dom0less-build module:
>>>> - get_allocation_size
>>>> - set_interrupt
>>>> - domain_fdt_begin_node
>>>> - make_memory_node
>>>> - make_resv_memory_node
>>>> - make_hypervisor_node
>>>> - make_psci_node
>>>> - make_cpus_node
>>>> - make_timer_node
>>>> - handle_device_interrupts
>>>> - construct_domain
>>>> - process_shm
>>>>
>>>> The functions allocate_static_memory and assign_static_memory_11
>>>> are now externally visible, so put their declarations into
>>>> domain_build.h and move the #else and stub definition in the header
>>>> as well.
>>>>
>>>> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
>>>> externally visible.
>>>>
>>>> Where spotted, fix code style issues.
>>>>
>>>> No functional change is intended.
>>>>
>>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>>>
>>> This is great! A couple of questions.
>>>
>>> Why was allocate_static_memory not moved to dom0less-build.c ?
>>
>> My aim is to decouple the features, so in patch 4 we move (just once as Julien suggested)
>> the static memory code on a module on its own, because we can have a guest booted with
>> dom0less feature but having it with static memory is optional.
> 
> OK
> 
> 
>>> Would it make sense to also move construct_dom0 to dom0less-build.c
>>> given the similarities with construct_domU? I am not sure about this.
>>>
>>
>> We can’t do that because the final goal of this serie is to have a Kconfig disabling dom0less,
>> so in that case we will end up removing from the compilation also construct_dom0.
> 
> OK. Probably we can't do much better than this.
> 
> 
> One more question on the code movement, and I would also like Julien and
> Bertrand to express their opinions on this.
> 
> Given that code movement is painful from a git history perspective, and
> given that we have to move dom0less code to xen/common anyway to make
> it available to RISC-V and also x86, could we do it in one shot here?

Looking at the name of the functions, I would expect that we would need 
another code movement in the future to move back Arm specific function 
under arch/arm/. So we would end up with two code movement as well.

I would prefer if we wait until RISC-V/x86 needs it so we don't 
unnecessarily move Arm specific code in common/.

Cheers,
Bertrand Marquis Oct. 2, 2023, 2:28 p.m. UTC | #5
Hi,

> On 2 Oct 2023, at 10:26, Julien Grall <julien@xen.org> wrote:
> 
> Hi Stefano,
> 
> On 29/09/2023 20:48, Stefano Stabellini wrote:
>> On Fri, 29 Sep 2023, Luca Fancellu wrote:
>>>> On 29 Sep 2023, at 01:33, Stefano Stabellini <sstabellini@kernel.org> wrote:
>>>> 
>>>> On Wed, 27 Sep 2023, Luca Fancellu wrote:
>>>>> Currently the dom0less feature code is mostly inside domain_build.c
>>>>> and setup.c, it is a feature that may not be useful to everyone so
>>>>> put the code in a different compilation module in order to make it
>>>>> easier to disable the feature in the future.
>>>>> 
>>>>> Move gic_interrupt_t in domain_build.h to use it with the function
>>>>> declaration, move its comment above the declaration.
>>>>> 
>>>>> The following functions are now visible externally from domain_build
>>>>> because they are used also from the dom0less-build module:
>>>>> - get_allocation_size
>>>>> - set_interrupt
>>>>> - domain_fdt_begin_node
>>>>> - make_memory_node
>>>>> - make_resv_memory_node
>>>>> - make_hypervisor_node
>>>>> - make_psci_node
>>>>> - make_cpus_node
>>>>> - make_timer_node
>>>>> - handle_device_interrupts
>>>>> - construct_domain
>>>>> - process_shm
>>>>> 
>>>>> The functions allocate_static_memory and assign_static_memory_11
>>>>> are now externally visible, so put their declarations into
>>>>> domain_build.h and move the #else and stub definition in the header
>>>>> as well.
>>>>> 
>>>>> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
>>>>> externally visible.
>>>>> 
>>>>> Where spotted, fix code style issues.
>>>>> 
>>>>> No functional change is intended.
>>>>> 
>>>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>>>> 
>>>> This is great! A couple of questions.
>>>> 
>>>> Why was allocate_static_memory not moved to dom0less-build.c ?
>>> 
>>> My aim is to decouple the features, so in patch 4 we move (just once as Julien suggested)
>>> the static memory code on a module on its own, because we can have a guest booted with
>>> dom0less feature but having it with static memory is optional.
>> OK
>>>> Would it make sense to also move construct_dom0 to dom0less-build.c
>>>> given the similarities with construct_domU? I am not sure about this.
>>>> 
>>> 
>>> We can’t do that because the final goal of this serie is to have a Kconfig disabling dom0less,
>>> so in that case we will end up removing from the compilation also construct_dom0.
>> OK. Probably we can't do much better than this.
>> One more question on the code movement, and I would also like Julien and
>> Bertrand to express their opinions on this.
>> Given that code movement is painful from a git history perspective, and
>> given that we have to move dom0less code to xen/common anyway to make
>> it available to RISC-V and also x86, could we do it in one shot here?
> 
> Looking at the name of the functions, I would expect that we would need another code movement in the future to move back Arm specific function under arch/arm/. So we would end up with two code movement as well.
> 
> I would prefer if we wait until RISC-V/x86 needs it so we don't unnecessarily move Arm specific code in common/.

I agree with Julien here.
Moving the code now will mean moving part of it back in arm in the future once we have a second user of this.
I would rather wait for the need to come so that we do this cleanly.

Also using hyperlaunch name now would be weird as there was no agreement on the naming (as far as I know) so far.

Cheers
Bertrand

> 
> Cheers,
> 
> -- 
> Julien Grall
Stefano Stabellini Oct. 2, 2023, 10:27 p.m. UTC | #6
On Mon, 2 Oct 2023, Bertrand Marquis wrote:
> > On 2 Oct 2023, at 10:26, Julien Grall <julien@xen.org> wrote:
> > On 29/09/2023 20:48, Stefano Stabellini wrote:
> >> On Fri, 29 Sep 2023, Luca Fancellu wrote:
> >>>> On 29 Sep 2023, at 01:33, Stefano Stabellini <sstabellini@kernel.org> wrote:
> >>>> 
> >>>> On Wed, 27 Sep 2023, Luca Fancellu wrote:
> >>>>> Currently the dom0less feature code is mostly inside domain_build.c
> >>>>> and setup.c, it is a feature that may not be useful to everyone so
> >>>>> put the code in a different compilation module in order to make it
> >>>>> easier to disable the feature in the future.
> >>>>> 
> >>>>> Move gic_interrupt_t in domain_build.h to use it with the function
> >>>>> declaration, move its comment above the declaration.
> >>>>> 
> >>>>> The following functions are now visible externally from domain_build
> >>>>> because they are used also from the dom0less-build module:
> >>>>> - get_allocation_size
> >>>>> - set_interrupt
> >>>>> - domain_fdt_begin_node
> >>>>> - make_memory_node
> >>>>> - make_resv_memory_node
> >>>>> - make_hypervisor_node
> >>>>> - make_psci_node
> >>>>> - make_cpus_node
> >>>>> - make_timer_node
> >>>>> - handle_device_interrupts
> >>>>> - construct_domain
> >>>>> - process_shm
> >>>>> 
> >>>>> The functions allocate_static_memory and assign_static_memory_11
> >>>>> are now externally visible, so put their declarations into
> >>>>> domain_build.h and move the #else and stub definition in the header
> >>>>> as well.
> >>>>> 
> >>>>> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
> >>>>> externally visible.
> >>>>> 
> >>>>> Where spotted, fix code style issues.
> >>>>> 
> >>>>> No functional change is intended.
> >>>>> 
> >>>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> >>>> 
> >>>> This is great! A couple of questions.
> >>>> 
> >>>> Why was allocate_static_memory not moved to dom0less-build.c ?
> >>> 
> >>> My aim is to decouple the features, so in patch 4 we move (just once as Julien suggested)
> >>> the static memory code on a module on its own, because we can have a guest booted with
> >>> dom0less feature but having it with static memory is optional.
> >> OK
> >>>> Would it make sense to also move construct_dom0 to dom0less-build.c
> >>>> given the similarities with construct_domU? I am not sure about this.
> >>>> 
> >>> 
> >>> We can’t do that because the final goal of this serie is to have a Kconfig disabling dom0less,
> >>> so in that case we will end up removing from the compilation also construct_dom0.
> >> OK. Probably we can't do much better than this.
> >> One more question on the code movement, and I would also like Julien and
> >> Bertrand to express their opinions on this.
> >> Given that code movement is painful from a git history perspective, and
> >> given that we have to move dom0less code to xen/common anyway to make
> >> it available to RISC-V and also x86, could we do it in one shot here?
> > 
> > Looking at the name of the functions, I would expect that we would need another code movement in the future to move back Arm specific function under arch/arm/. So we would end up with two code movement as well.
> > 
> > I would prefer if we wait until RISC-V/x86 needs it so we don't unnecessarily move Arm specific code in common/.
> 
> I agree with Julien here.
> Moving the code now will mean moving part of it back in arm in the future once we have a second user of this.
> I would rather wait for the need to come so that we do this cleanly.
> 
> Also using hyperlaunch name now would be weird as there was no agreement on the naming (as far as I know) so far.

RISC-V is already using dom0less code, however in a downstream
repository. To make progress faster the code was copied (not shared)
from arch/arm to arch/riscv. More details on the Xen community call this
week. https://gitlab.com/xen-project/people/olkur/xen/-/blob/riscv_aia_support/xen/arch/riscv/domain_build.c?ref_type=heads

Hyperlaunch also needs dom0less code to be made common to make progress:
https://marc.info/?l=xen-devel&m=169154172700539

So I think that there is an immediate RISC-V and X86 need.

But the point about "moving the code now will mean moving part of it
back in arm in the future" is valid. How do we move forward?

I don't think we want to block Luca's progress to wait for more
plumbings done on x86 or RISC-V. Also we don't want to scope creep
Luca's series too much.

But I think the goal should be to move dom0less code to xen/common as
soon as possible and make it arch neutral. How do we get there?
Luca Fancellu Oct. 3, 2023, 7:44 a.m. UTC | #7
>>>> Given that code movement is painful from a git history perspective, and
>>>> given that we have to move dom0less code to xen/common anyway to make
>>>> it available to RISC-V and also x86, could we do it in one shot here?
>>> 
>>> Looking at the name of the functions, I would expect that we would need another code movement in the future to move back Arm specific function under arch/arm/. So we would end up with two code movement as well.
>>> 
>>> I would prefer if we wait until RISC-V/x86 needs it so we don't unnecessarily move Arm specific code in common/.
>> 
>> I agree with Julien here.
>> Moving the code now will mean moving part of it back in arm in the future once we have a second user of this.
>> I would rather wait for the need to come so that we do this cleanly.
>> 
>> Also using hyperlaunch name now would be weird as there was no agreement on the naming (as far as I know) so far.
> 
> RISC-V is already using dom0less code, however in a downstream
> repository. To make progress faster the code was copied (not shared)
> from arch/arm to arch/riscv. More details on the Xen community call this
> week. https://gitlab.com/xen-project/people/olkur/xen/-/blob/riscv_aia_support/xen/arch/riscv/domain_build.c?ref_type=heads
> 
> Hyperlaunch also needs dom0less code to be made common to make progress:
> https://marc.info/?l=xen-devel&m=169154172700539
> 
> So I think that there is an immediate RISC-V and X86 need.
> 
> But the point about "moving the code now will mean moving part of it
> back in arm in the future" is valid. How do we move forward?
> 
> I don't think we want to block Luca's progress to wait for more
> plumbings done on x86 or RISC-V. Also we don't want to scope creep
> Luca's series too much.
> 
> But I think the goal should be to move dom0less code to xen/common as
> soon as possible and make it arch neutral. How do we get there?

So here is why I felt painful doing now a move to the common code, but maybe you (maintainers) can give me some
feedbacks.

I see that the functions that might be put in common are these, some of them however have arm specific code in them:

is_dom0less_mode
allocate_bank_memory
allocate_memory
handle_passthrough_prop
handle_prop_pfdt
scan_pfdt_node
check_partial_fdt
domain_p2m_pages
alloc_xenstore_evtchn
domain_handle_dtb_bootmodule (contains reference to the gic)
prepare_dtb_domU (have reference to psci, gic, vpl011)
construct_domU (have reference to vpl011, static shared memory)
create_domUs(have reference to vpl011, sve)

Here the functions that can stay in arm code:

make_gicv2_domU_node
make_gicv3_domU_node
make_gic_domU_node
make_vpl011_uart_node


Given that the status after the move to common of the above functions is not very clean, I’ve decided to don’t do that,
however if you are fine with it, I can do the modification and who is going to work further on the subject can consolidate
and make them build for other architecture.

Cheers,
Luca
Michal Orzel Oct. 3, 2023, 8:03 a.m. UTC | #8
Hi Luca,

On 03/10/2023 09:44, Luca Fancellu wrote:
> 
> 
>>>>> Given that code movement is painful from a git history perspective, and
>>>>> given that we have to move dom0less code to xen/common anyway to make
>>>>> it available to RISC-V and also x86, could we do it in one shot here?
>>>>
>>>> Looking at the name of the functions, I would expect that we would need another code movement in the future to move back Arm specific function under arch/arm/. So we would end up with two code movement as well.
>>>>
>>>> I would prefer if we wait until RISC-V/x86 needs it so we don't unnecessarily move Arm specific code in common/.
>>>
>>> I agree with Julien here.
>>> Moving the code now will mean moving part of it back in arm in the future once we have a second user of this.
>>> I would rather wait for the need to come so that we do this cleanly.
>>>
>>> Also using hyperlaunch name now would be weird as there was no agreement on the naming (as far as I know) so far.
>>
>> RISC-V is already using dom0less code, however in a downstream
>> repository. To make progress faster the code was copied (not shared)
>> from arch/arm to arch/riscv. More details on the Xen community call this
>> week. https://gitlab.com/xen-project/people/olkur/xen/-/blob/riscv_aia_support/xen/arch/riscv/domain_build.c?ref_type=heads
>>
>> Hyperlaunch also needs dom0less code to be made common to make progress:
>> https://marc.info/?l=xen-devel&m=169154172700539
>>
>> So I think that there is an immediate RISC-V and X86 need.
>>
>> But the point about "moving the code now will mean moving part of it
>> back in arm in the future" is valid. How do we move forward?
>>
>> I don't think we want to block Luca's progress to wait for more
>> plumbings done on x86 or RISC-V. Also we don't want to scope creep
>> Luca's series too much.
>>
>> But I think the goal should be to move dom0less code to xen/common as
>> soon as possible and make it arch neutral. How do we get there?
> 
> So here is why I felt painful doing now a move to the common code, but maybe you (maintainers) can give me some
> feedbacks.
> 
> I see that the functions that might be put in common are these, some of them however have arm specific code in them:
> 
> is_dom0less_mode
> allocate_bank_memory
> allocate_memory
> handle_passthrough_prop
> handle_prop_pfdt
> scan_pfdt_node
> check_partial_fdt
> domain_p2m_pages
> alloc_xenstore_evtchn
> domain_handle_dtb_bootmodule (contains reference to the gic)
> prepare_dtb_domU (have reference to psci, gic, vpl011)
> construct_domU (have reference to vpl011, static shared memory)
> create_domUs(have reference to vpl011, sve)
> 
> Here the functions that can stay in arm code:
> 
> make_gicv2_domU_node
> make_gicv3_domU_node
> make_gic_domU_node
> make_vpl011_uart_node
> 
> 
> Given that the status after the move to common of the above functions is not very clean, I’ve decided to don’t do that,
> however if you are fine with it, I can do the modification and who is going to work further on the subject can consolidate
> and make them build for other architecture.
> 
Another option would be to hold off for a while until work on hyperlaunch/RISCV dom0less starts to better understand the needs,
concepts and to avoid multiple code movement which results in a horrible history. I know this is not nice but I can tell you that
I had to stop working on some features like FLASK with dom0less, static domids for dom0less domUs, because according to the hyperlaunch design,
this will need to be common. With hyperlaunch, everything starts with the domain configuration that is supposed to be arch neutral, so
until this is done, it's difficult to do anything in this area.

~Michal
Stefano Stabellini Oct. 3, 2023, 7:52 p.m. UTC | #9
On Tue, 3 Oct 2023, Michal Orzel wrote:
> On 03/10/2023 09:44, Luca Fancellu wrote:
> >>>>> Given that code movement is painful from a git history perspective, and
> >>>>> given that we have to move dom0less code to xen/common anyway to make
> >>>>> it available to RISC-V and also x86, could we do it in one shot here?
> >>>>
> >>>> Looking at the name of the functions, I would expect that we would need another code movement in the future to move back Arm specific function under arch/arm/. So we would end up with two code movement as well.
> >>>>
> >>>> I would prefer if we wait until RISC-V/x86 needs it so we don't unnecessarily move Arm specific code in common/.
> >>>
> >>> I agree with Julien here.
> >>> Moving the code now will mean moving part of it back in arm in the future once we have a second user of this.
> >>> I would rather wait for the need to come so that we do this cleanly.
> >>>
> >>> Also using hyperlaunch name now would be weird as there was no agreement on the naming (as far as I know) so far.
> >>
> >> RISC-V is already using dom0less code, however in a downstream
> >> repository. To make progress faster the code was copied (not shared)
> >> from arch/arm to arch/riscv. More details on the Xen community call this
> >> week. https://gitlab.com/xen-project/people/olkur/xen/-/blob/riscv_aia_support/xen/arch/riscv/domain_build.c?ref_type=heads
> >>
> >> Hyperlaunch also needs dom0less code to be made common to make progress:
> >> https://marc.info/?l=xen-devel&m=169154172700539
> >>
> >> So I think that there is an immediate RISC-V and X86 need.
> >>
> >> But the point about "moving the code now will mean moving part of it
> >> back in arm in the future" is valid. How do we move forward?
> >>
> >> I don't think we want to block Luca's progress to wait for more
> >> plumbings done on x86 or RISC-V. Also we don't want to scope creep
> >> Luca's series too much.
> >>
> >> But I think the goal should be to move dom0less code to xen/common as
> >> soon as possible and make it arch neutral. How do we get there?
> > 
> > So here is why I felt painful doing now a move to the common code, but maybe you (maintainers) can give me some
> > feedbacks.
> > 
> > I see that the functions that might be put in common are these, some of them however have arm specific code in them:
> > 
> > is_dom0less_mode
> > allocate_bank_memory
> > allocate_memory
> > handle_passthrough_prop
> > handle_prop_pfdt
> > scan_pfdt_node
> > check_partial_fdt
> > domain_p2m_pages
> > alloc_xenstore_evtchn
> > domain_handle_dtb_bootmodule (contains reference to the gic)
> > prepare_dtb_domU (have reference to psci, gic, vpl011)
> > construct_domU (have reference to vpl011, static shared memory)
> > create_domUs(have reference to vpl011, sve)
> > 
> > Here the functions that can stay in arm code:
> > 
> > make_gicv2_domU_node
> > make_gicv3_domU_node
> > make_gic_domU_node
> > make_vpl011_uart_node
> > 
> > 
> > Given that the status after the move to common of the above functions is not very clean, I’ve decided to don’t do that,
> > however if you are fine with it, I can do the modification and who is going to work further on the subject can consolidate
> > and make them build for other architecture.
> > 
> Another option would be to hold off for a while until work on hyperlaunch/RISCV dom0less starts to better understand the needs,
> concepts and to avoid multiple code movement which results in a horrible history. I know this is not nice but I can tell you that
> I had to stop working on some features like FLASK with dom0less, static domids for dom0less domUs, because according to the hyperlaunch design,
> this will need to be common. With hyperlaunch, everything starts with the domain configuration that is supposed to be arch neutral, so
> until this is done, it's difficult to do anything in this area.

This is not good. In an ideal world, Hyperlaunch shouldn't block
progress for dom0less. We shouldn't have to wait many months to make
progress on FLASK with dom0less, static domids for dom0less domUs, etc.
because of potential Hyperlaunch implications.

In my option a delay of few weeks might be OK; we should be reasonable.
But a delay of few months is not. Cosidering review times, release
schedules etc. it could become a very significant delay.

Also, hyperlaunch contributors are not familiar with dom0less and are
not familiar with arm. (This is so true that they have their own
reimplementation of the parser.) I think the dom0less separation / code
movement is better done by us in the arm community because we know the
code far better.

So I think Luca's suggestion above is in the right direction. I would
follow Luca's suggestion with only one difference: I would keep
prepare_dtb_domU in the arm code, together with make_gicv*_domU_node and
make_vpl011_uart_node. I would move the rest to common.
Julien Grall Oct. 3, 2023, 8:11 p.m. UTC | #10
Hi Stefano,

On 03/10/2023 20:52, Stefano Stabellini wrote:
> On Tue, 3 Oct 2023, Michal Orzel wrote:
>> On 03/10/2023 09:44, Luca Fancellu wrote:
>>> Given that the status after the move to common of the above functions is not very clean, I’ve decided to don’t do that,
>>> however if you are fine with it, I can do the modification and who is going to work further on the subject can consolidate
>>> and make them build for other architecture.
>>>
>> Another option would be to hold off for a while until work on hyperlaunch/RISCV dom0less starts to better understand the needs,
>> concepts and to avoid multiple code movement which results in a horrible history. I know this is not nice but I can tell you that
>> I had to stop working on some features like FLASK with dom0less, static domids for dom0less domUs, because according to the hyperlaunch design,
>> this will need to be common. With hyperlaunch, everything starts with the domain configuration that is supposed to be arch neutral, so
>> until this is done, it's difficult to do anything in this area.
> 
> This is not good. In an ideal world, Hyperlaunch shouldn't block
> progress for dom0less. We shouldn't have to wait many months to make
> progress on FLASK with dom0less, static domids for dom0less domUs, etc.
> because of potential Hyperlaunch implications.

It depends what are the implications. If it means that the bindings will 
change a release after. Then I think we should instead work on 
standardizing Hyperlaunch (or whichever name we decide to use) earlier 
so our users can rely on the interface for multiple revisions.

We could of course decide to maintain both interfaces. But this means 
more maintenance work which could have been avoided by fast tracking 
Hyperlaunch (it is not like we don't know it is coming...).

> In my option a delay of few weeks might be OK; we should be reasonable.
> But a delay of few months is not. Cosidering review times, release
> schedules etc. it could become a very significant delay. >
> Also, hyperlaunch contributors are not familiar with dom0less and are
> not familiar with arm. (This is so true that they have their own
> reimplementation of the parser.) I think the dom0less separation / code
> movement is better done by us in the arm community because we know the
> code far better.

I think we need both the arm and hyperlaunch community to work together 
(see more below).

> 
> So I think Luca's suggestion above is in the right direction. I would
> follow Luca's suggestion with only one difference: I would keep
> prepare_dtb_domU in the arm code, together with make_gicv*_domU_node and
> make_vpl011_uart_node. I would move the rest to common.

Luca's pointed out that some function (such as construct_domU) would 
contain reference to Arm specific code. So with your proposal, I am 
under the impression that we would move code that would then end up to 
be moved again in a few months time. So it is defeating your goal (even 
though the movement will hopefully be smaller).

As I wrote above, I don't think the Arm community alone is in the 
position to decide what should be in common and what should be in arch 
specific. We need the hyperlaunch community to agree on their approach 
so we can know which split makes sense. This is similar to the on-going 
MMU split to cater the MPU. We looked the MPU code to decide of the best 
split.

A potential approach would be to look at the RISC-V implementation of 
dom0less and see the common parts. But then we are going in the the 
scope creep you mention earlier.

So overall, I feel that Lucas' approach is better until Hyperlaunch gain 
momentum.

Cheers,
Stefano Stabellini Oct. 3, 2023, 8:40 p.m. UTC | #11
On Tue, 3 Oct 2023, Julien Grall wrote:
> On 03/10/2023 20:52, Stefano Stabellini wrote:
> > On Tue, 3 Oct 2023, Michal Orzel wrote:
> > > On 03/10/2023 09:44, Luca Fancellu wrote:
> > > > Given that the status after the move to common of the above functions is
> > > > not very clean, I’ve decided to don’t do that,
> > > > however if you are fine with it, I can do the modification and who is
> > > > going to work further on the subject can consolidate
> > > > and make them build for other architecture.
> > > > 
> > > Another option would be to hold off for a while until work on
> > > hyperlaunch/RISCV dom0less starts to better understand the needs,
> > > concepts and to avoid multiple code movement which results in a horrible
> > > history. I know this is not nice but I can tell you that
> > > I had to stop working on some features like FLASK with dom0less, static
> > > domids for dom0less domUs, because according to the hyperlaunch design,
> > > this will need to be common. With hyperlaunch, everything starts with the
> > > domain configuration that is supposed to be arch neutral, so
> > > until this is done, it's difficult to do anything in this area.
> > 
> > This is not good. In an ideal world, Hyperlaunch shouldn't block
> > progress for dom0less. We shouldn't have to wait many months to make
> > progress on FLASK with dom0less, static domids for dom0less domUs, etc.
> > because of potential Hyperlaunch implications.
> 
> It depends what are the implications. If it means that the bindings will
> change a release after. Then I think we should instead work on standardizing
> Hyperlaunch (or whichever name we decide to use) earlier so our users can rely
> on the interface for multiple revisions.
> 
> We could of course decide to maintain both interfaces. But this means more
> maintenance work which could have been avoided by fast tracking Hyperlaunch
> (it is not like we don't know it is coming...).
> 
> > In my option a delay of few weeks might be OK; we should be reasonable.
> > But a delay of few months is not. Cosidering review times, release
> > schedules etc. it could become a very significant delay. >
> > Also, hyperlaunch contributors are not familiar with dom0less and are
> > not familiar with arm. (This is so true that they have their own
> > reimplementation of the parser.) I think the dom0less separation / code
> > movement is better done by us in the arm community because we know the
> > code far better.
> 
> I think we need both the arm and hyperlaunch community to work together (see
> more below).
> 
> > 
> > So I think Luca's suggestion above is in the right direction. I would
> > follow Luca's suggestion with only one difference: I would keep
> > prepare_dtb_domU in the arm code, together with make_gicv*_domU_node and
> > make_vpl011_uart_node. I would move the rest to common.
> 
> Luca's pointed out that some function (such as construct_domU) would contain
> reference to Arm specific code. So with your proposal, I am under the
> impression that we would move code that would then end up to be moved again in
> a few months time. So it is defeating your goal (even though the movement will
> hopefully be smaller).

I was assuming that e.g. construct_domU would reference ARM code, but
the ARM code would live under arch/arm. So yes construct_domU would need
some refactoring in the future to make the call generic instead of
arm-specific, but wouldn't require significant additional code movement.

But maybe I am wrong. I don't know if Luca has a clearer picture in his
mind.


> As I wrote above, I don't think the Arm community alone is in the position to
> decide what should be in common and what should be in arch specific. We need
> the hyperlaunch community to agree on their approach so we can know which
> split makes sense. This is similar to the on-going MMU split to cater the MPU.
> We looked the MPU code to decide of the best split.
> 
> A potential approach would be to look at the RISC-V implementation of dom0less
> and see the common parts. But then we are going in the the scope creep you
> mention earlier.

I didn't do a proper investigation and I didn't look at the RISC-V or
hyperlaunch code in details. From my discussions with both groups, here
is my understanding of arm-specific things we currently have:

- 1:1 memory mapping / static memory / static heap because they are
  currently unimplemented on other arches
- most domU device tree building because most of virtual devices are
  different (timer, interrupt controller, uart, etc)
- the equivalent for dom0: the gic/timer device tree generated nodes
- the blacklist at the beginning of handle_node
- for hyperlaunch, we need to support "module-index" as a way to get the
  physical address of a module
- reserved_memory doesn't exist on x86
- a couple of dom0less device tree properties are arm-specific, such as
  "sve" and "vpl011"

This is me hand-waiving, so it might not be useful.


> So overall, I feel that Lucas' approach is better until Hyperlaunch gain
> momentum.

I am OK with Luca's original approach. I just wanted to open the
discussion in case there is a better way.
Julien Grall Oct. 4, 2023, 5:28 p.m. UTC | #12
Hi,

On 03/10/2023 21:40, Stefano Stabellini wrote:
> On Tue, 3 Oct 2023, Julien Grall wrote:
>> On 03/10/2023 20:52, Stefano Stabellini wrote:
>>> On Tue, 3 Oct 2023, Michal Orzel wrote:
>>>> On 03/10/2023 09:44, Luca Fancellu wrote:
>>>>> Given that the status after the move to common of the above functions is
>>>>> not very clean, I’ve decided to don’t do that,
>>>>> however if you are fine with it, I can do the modification and who is
>>>>> going to work further on the subject can consolidate
>>>>> and make them build for other architecture.
>>>>>
>>>> Another option would be to hold off for a while until work on
>>>> hyperlaunch/RISCV dom0less starts to better understand the needs,
>>>> concepts and to avoid multiple code movement which results in a horrible
>>>> history. I know this is not nice but I can tell you that
>>>> I had to stop working on some features like FLASK with dom0less, static
>>>> domids for dom0less domUs, because according to the hyperlaunch design,
>>>> this will need to be common. With hyperlaunch, everything starts with the
>>>> domain configuration that is supposed to be arch neutral, so
>>>> until this is done, it's difficult to do anything in this area.
>>>
>>> This is not good. In an ideal world, Hyperlaunch shouldn't block
>>> progress for dom0less. We shouldn't have to wait many months to make
>>> progress on FLASK with dom0less, static domids for dom0less domUs, etc.
>>> because of potential Hyperlaunch implications.
>>
>> It depends what are the implications. If it means that the bindings will
>> change a release after. Then I think we should instead work on standardizing
>> Hyperlaunch (or whichever name we decide to use) earlier so our users can rely
>> on the interface for multiple revisions.
>>
>> We could of course decide to maintain both interfaces. But this means more
>> maintenance work which could have been avoided by fast tracking Hyperlaunch
>> (it is not like we don't know it is coming...).
>>
>>> In my option a delay of few weeks might be OK; we should be reasonable.
>>> But a delay of few months is not. Cosidering review times, release
>>> schedules etc. it could become a very significant delay. >
>>> Also, hyperlaunch contributors are not familiar with dom0less and are
>>> not familiar with arm. (This is so true that they have their own
>>> reimplementation of the parser.) I think the dom0less separation / code
>>> movement is better done by us in the arm community because we know the
>>> code far better.
>>
>> I think we need both the arm and hyperlaunch community to work together (see
>> more below).
>>
>>>
>>> So I think Luca's suggestion above is in the right direction. I would
>>> follow Luca's suggestion with only one difference: I would keep
>>> prepare_dtb_domU in the arm code, together with make_gicv*_domU_node and
>>> make_vpl011_uart_node. I would move the rest to common.
>>
>> Luca's pointed out that some function (such as construct_domU) would contain
>> reference to Arm specific code. So with your proposal, I am under the
>> impression that we would move code that would then end up to be moved again in
>> a few months time. So it is defeating your goal (even though the movement will
>> hopefully be smaller).
> 
> I was assuming that e.g. construct_domU would reference ARM code, but
> the ARM code would live under arch/arm. So yes construct_domU would need
> some refactoring in the future to make the call generic instead of
> arm-specific, but wouldn't require significant additional code movement.

This is not really great. We could end up to have an extension of Arm in 
common for multiple release (the timeline for hyperlaunch is known). If 
you want to move the code now, then it would be preferable that we 
abstract the calls (e.g. arch_construct_domU()) at least have a sensible 
interface.

>  >> As I wrote above, I don't think the Arm community alone is in the 
position to
>> decide what should be in common and what should be in arch specific. We need
>> the hyperlaunch community to agree on their approach so we can know which
>> split makes sense. This is similar to the on-going MMU split to cater the MPU.
>> We looked the MPU code to decide of the best split.
>>
>> A potential approach would be to look at the RISC-V implementation of dom0less
>> and see the common parts. But then we are going in the the scope creep you
>> mention earlier.
> 
> I didn't do a proper investigation and I didn't look at the RISC-V or
> hyperlaunch code in details. From my discussions with both groups, here
> is my understanding of arm-specific things we currently have:
> 
> - 1:1 memory mapping / static memory / static heap because they are
>    currently unimplemented on other arches

But they might. So this is the sort of feature that would want to be 
implemented in common.

> - most domU device tree building because most of virtual devices are
>    different (timer, interrupt controller, uart, etc)

Make sense for this one

> - the equivalent for dom0: the gic/timer device tree generated nodes

and this one.

> - the blacklist at the beginning of handle_node

Hmmm... I would expect this to be useful at least for RISC-V as surely 
they want to hide some device. Maybe this could be abstract.

> - for hyperlaunch, we need to support "module-index" as a way to get the
>    physical address of a module
> - reserved_memory doesn't exist on x86

Are you referring to the variable? If so, the concept of reserved region 
would likely exists for other architecture. So this is a feature that 
would need to be abstracted so it can be re-used.

> - a couple of dom0less device tree properties are arm-specific, such as
>    "sve" and "vpl011"
> 
> This is me hand-waiving, so it might not be useful.
> 
> 
>> So overall, I feel that Lucas' approach is better until Hyperlaunch gain
>> momentum.
> 
> I am OK with Luca's original approach. I just wanted to open the
> discussion in case there is a better way.

I don't particular mind moving the code to common. But it needs to be 
done correctly (i.e. not common/<foo>.c be a simple extension of Arm). 
We also need to agree on a name for the file/directory (and most likely 
of the feature) to avoid another renaming.

Cheers,
Stefano Stabellini Oct. 4, 2023, 10:59 p.m. UTC | #13
On Wed, 4 Oct 2023, Julien Grall wrote:
> On 03/10/2023 21:40, Stefano Stabellini wrote:
> > On Tue, 3 Oct 2023, Julien Grall wrote:
> > > On 03/10/2023 20:52, Stefano Stabellini wrote:
> > > > On Tue, 3 Oct 2023, Michal Orzel wrote:
> > > > > On 03/10/2023 09:44, Luca Fancellu wrote:
> > > > > > Given that the status after the move to common of the above
> > > > > > functions is
> > > > > > not very clean, I’ve decided to don’t do that,
> > > > > > however if you are fine with it, I can do the modification and who
> > > > > > is
> > > > > > going to work further on the subject can consolidate
> > > > > > and make them build for other architecture.
> > > > > > 
> > > > > Another option would be to hold off for a while until work on
> > > > > hyperlaunch/RISCV dom0less starts to better understand the needs,
> > > > > concepts and to avoid multiple code movement which results in a
> > > > > horrible
> > > > > history. I know this is not nice but I can tell you that
> > > > > I had to stop working on some features like FLASK with dom0less,
> > > > > static
> > > > > domids for dom0less domUs, because according to the hyperlaunch
> > > > > design,
> > > > > this will need to be common. With hyperlaunch, everything starts with
> > > > > the
> > > > > domain configuration that is supposed to be arch neutral, so
> > > > > until this is done, it's difficult to do anything in this area.
> > > > 
> > > > This is not good. In an ideal world, Hyperlaunch shouldn't block
> > > > progress for dom0less. We shouldn't have to wait many months to make
> > > > progress on FLASK with dom0less, static domids for dom0less domUs, etc.
> > > > because of potential Hyperlaunch implications.
> > > 
> > > It depends what are the implications. If it means that the bindings will
> > > change a release after. Then I think we should instead work on
> > > standardizing
> > > Hyperlaunch (or whichever name we decide to use) earlier so our users can
> > > rely
> > > on the interface for multiple revisions.
> > > 
> > > We could of course decide to maintain both interfaces. But this means more
> > > maintenance work which could have been avoided by fast tracking
> > > Hyperlaunch
> > > (it is not like we don't know it is coming...).
> > > 
> > > > In my option a delay of few weeks might be OK; we should be reasonable.
> > > > But a delay of few months is not. Cosidering review times, release
> > > > schedules etc. it could become a very significant delay. >
> > > > Also, hyperlaunch contributors are not familiar with dom0less and are
> > > > not familiar with arm. (This is so true that they have their own
> > > > reimplementation of the parser.) I think the dom0less separation / code
> > > > movement is better done by us in the arm community because we know the
> > > > code far better.
> > > 
> > > I think we need both the arm and hyperlaunch community to work together
> > > (see
> > > more below).
> > > 
> > > > 
> > > > So I think Luca's suggestion above is in the right direction. I would
> > > > follow Luca's suggestion with only one difference: I would keep
> > > > prepare_dtb_domU in the arm code, together with make_gicv*_domU_node and
> > > > make_vpl011_uart_node. I would move the rest to common.
> > > 
> > > Luca's pointed out that some function (such as construct_domU) would
> > > contain
> > > reference to Arm specific code. So with your proposal, I am under the
> > > impression that we would move code that would then end up to be moved
> > > again in
> > > a few months time. So it is defeating your goal (even though the movement
> > > will
> > > hopefully be smaller).
> > 
> > I was assuming that e.g. construct_domU would reference ARM code, but
> > the ARM code would live under arch/arm. So yes construct_domU would need
> > some refactoring in the future to make the call generic instead of
> > arm-specific, but wouldn't require significant additional code movement.
> 
> This is not really great. We could end up to have an extension of Arm in
> common for multiple release (the timeline for hyperlaunch is known). If you
> want to move the code now, then it would be preferable that we abstract the
> calls (e.g. arch_construct_domU()) at least have a sensible interface.
> 
> >  >> As I wrote above, I don't think the Arm community alone is in the 
> position to
> > > decide what should be in common and what should be in arch specific. We
> > > need
> > > the hyperlaunch community to agree on their approach so we can know which
> > > split makes sense. This is similar to the on-going MMU split to cater the
> > > MPU.
> > > We looked the MPU code to decide of the best split.
> > > 
> > > A potential approach would be to look at the RISC-V implementation of
> > > dom0less
> > > and see the common parts. But then we are going in the the scope creep you
> > > mention earlier.
> > 
> > I didn't do a proper investigation and I didn't look at the RISC-V or
> > hyperlaunch code in details. From my discussions with both groups, here
> > is my understanding of arm-specific things we currently have:
> > 
> > - 1:1 memory mapping / static memory / static heap because they are
> >    currently unimplemented on other arches
> 
> But they might. So this is the sort of feature that would want to be
> implemented in common.
> 
> > - most domU device tree building because most of virtual devices are
> >    different (timer, interrupt controller, uart, etc)
> 
> Make sense for this one
> 
> > - the equivalent for dom0: the gic/timer device tree generated nodes
> 
> and this one.
> 
> > - the blacklist at the beginning of handle_node
> 
> Hmmm... I would expect this to be useful at least for RISC-V as surely they
> want to hide some device. Maybe this could be abstract.
> 
> > - for hyperlaunch, we need to support "module-index" as a way to get the
> >    physical address of a module
> > - reserved_memory doesn't exist on x86
> 
> Are you referring to the variable? If so, the concept of reserved region would
> likely exists for other architecture. So this is a feature that would need to
> be abstracted so it can be re-used.

Yes, you are right. It doesn't seem that there is an easy way to
abstract these things.


> > - a couple of dom0less device tree properties are arm-specific, such as
> >    "sve" and "vpl011"
> > 
> > This is me hand-waiving, so it might not be useful.
> > 
> > 
> > > So overall, I feel that Lucas' approach is better until Hyperlaunch gain
> > > momentum.
> > 
> > I am OK with Luca's original approach. I just wanted to open the
> > discussion in case there is a better way.
> 
> I don't particular mind moving the code to common. But it needs to be done
> correctly (i.e. not common/<foo>.c be a simple extension of Arm). We also need
> to agree on a name for the file/directory (and most likely of the feature) to
> avoid another renaming.

I think the best way forward is to keep Luca's series as is because
anything else would require a significant amount of work. There is no
easy way to properly abstract the things listed above (1:1 memory
mappings, handle_node blacklist etc.) Still extracting dom0less out of
domain_build.c is a very good step in the right direction and I think we
should take it.
Julien Grall Nov. 1, 2023, 6:51 p.m. UTC | #14
Hi Luca,

On 27/09/2023 15:01, Luca Fancellu wrote:
> Currently the dom0less feature code is mostly inside domain_build.c
> and setup.c, it is a feature that may not be useful to everyone so
> put the code in a different compilation module in order to make it
> easier to disable the feature in the future.
> 
> Move gic_interrupt_t in domain_build.h to use it with the function
> declaration, move its comment above the declaration.
> 
> The following functions are now visible externally from domain_build
> because they are used also from the dom0less-build module:
>   - get_allocation_size
>   - set_interrupt
>   - domain_fdt_begin_node
>   - make_memory_node
>   - make_resv_memory_node
>   - make_hypervisor_node
>   - make_psci_node
>   - make_cpus_node
>   - make_timer_node
>   - handle_device_interrupts
>   - construct_domain
>   - process_shm
> 
> The functions allocate_static_memory and assign_static_memory_11
> are now externally visible, so put their declarations into
> domain_build.h and move the #else and stub definition in the header
> as well.
> 
> Move is_dom0less_mode from setup.c to dom0less-build.c and make it
> externally visible.
> 
> Where spotted, fix code style issues.
> 
> No functional change is intended.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
>   xen/arch/arm/Makefile                     |    1 +
>   xen/arch/arm/dom0less-build.c             | 1087 ++++++++++++++++++
>   xen/arch/arm/domain_build.c               | 1276 ++-------------------
>   xen/arch/arm/include/asm/dom0less-build.h |   25 +
>   xen/arch/arm/include/asm/domain_build.h   |   58 +
>   xen/arch/arm/include/asm/setup.h          |    1 -
>   xen/arch/arm/setup.c                      |   33 +-
>   7 files changed, 1288 insertions(+), 1193 deletions(-)
>   create mode 100644 xen/arch/arm/dom0less-build.c
>   create mode 100644 xen/arch/arm/include/asm/dom0less-build.h
> 
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index 81c31c36fc3d..70dd7201ef30 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -15,6 +15,7 @@ obj-y += cpufeature.o
>   obj-y += decode.o
>   obj-y += device.o
>   obj-$(CONFIG_IOREQ_SERVER) += dm.o
> +obj-y += dom0less-build.init.o
>   obj-y += domain.o
>   obj-y += domain_build.init.o
>   obj-$(CONFIG_ARCH_MAP_DOMAIN_PAGE) += domain_page.o
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> new file mode 100644
> index 000000000000..dc9c90cf00a7
> --- /dev/null
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -0,0 +1,1087 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * xen/arch/arm/dom0less-build.c
> + *
> + * Code related to the dom0less functionality
> + *
> + * Copyright (C) 2023 Arm Ltd.

This feels a bit odd to add your copyright here. It sounds like Arm 
wrote all the code, but you only moved. That said, I am not a lawyer.

> + */
> +
> +#include <xen/device_tree.h>
> +#include <xen/err.h>
> +#include <xen/event.h>
> +#include <xen/grant_table.h>
> +#include <xen/iocap.h>
> +#include <xen/libfdt/libfdt.h>
> +#include <xen/sched.h>
> +#include <xen/serial.h>
> +#include <xen/sizes.h>
> +#include <xen/vmap.h>
> +
> +#include <asm/arm64/sve.h>
> +#include <asm/dom0less-build.h>
> +#include <asm/domain_build.h>
> +
> +bool __init is_dom0less_mode(void)
> +{
> +    struct bootmodules *mods = &bootinfo.modules;
> +    struct bootmodule *mod;
> +    unsigned int i;
> +    bool dom0found = false;
> +    bool domUfound = false;
> +
> +    /* Look into the bootmodules */
> +    for ( i = 0 ; i < mods->nr_mods ; i++ )
> +    {
> +        mod = &mods->module[i];
> +        /* Find if dom0 and domU kernels are present */
> +        if ( mod->kind == BOOTMOD_KERNEL )
> +        {
> +            if ( mod->domU == false )
> +            {
> +                dom0found = true;
> +                break;
> +            }
> +            else
> +                domUfound = true;
> +        }
> +    }
> +
> +    /*
> +     * If there is no dom0 kernel but at least one domU, then we are in
> +     * dom0less mode
> +     */
> +    return ( !dom0found && domUfound );
> +}
> +
> +static bool __init allocate_bank_memory(struct domain *d,
> +                                        struct kernel_info *kinfo,
> +                                        gfn_t sgfn,
> +                                        paddr_t tot_size)

I understand that this is today only used by domUs. However, we could 
technically use it for allocating dom0 memory if it is not 1:1.

So I think this function should stay in domain_build.c.

The rest of the code movement looks alright to me.

Cheers,
Luca Fancellu Nov. 2, 2023, 11:15 a.m. UTC | #15
Hi Julien,

>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
>> new file mode 100644
>> index 000000000000..dc9c90cf00a7
>> --- /dev/null
>> +++ b/xen/arch/arm/dom0less-build.c
>> @@ -0,0 +1,1087 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * xen/arch/arm/dom0less-build.c
>> + *
>> + * Code related to the dom0less functionality
>> + *
>> + * Copyright (C) 2023 Arm Ltd.
> 
> This feels a bit odd to add your copyright here. It sounds like Arm wrote all the code, but you only moved. That said, I am not a lawyer.

Mmm yes, we should be required to put our copyright in everything we create or modify or touch,
said that, I could modify that to be like this:

/*
 * xen/arch/arm/dom0less-build.c 
 *
 * Code related to the dom0less functionality
 *
 * Original code from xen/arch/arm/domain_build.c
 * Modifications: Copyright (C) 2023 Arm Ltd.
 */

What do you think?

> 
>> + */
>> +
>> +#include <xen/device_tree.h>
>> +#include <xen/err.h>
>> +#include <xen/event.h>
>> +#include <xen/grant_table.h>
>> +#include <xen/iocap.h>
>> +#include <xen/libfdt/libfdt.h>
>> +#include <xen/sched.h>
>> +#include <xen/serial.h>
>> +#include <xen/sizes.h>
>> +#include <xen/vmap.h>
>> +
>> +#include <asm/arm64/sve.h>
>> +#include <asm/dom0less-build.h>
>> +#include <asm/domain_build.h>
>> +
>> +bool __init is_dom0less_mode(void)
>> +{
>> +    struct bootmodules *mods = &bootinfo.modules;
>> +    struct bootmodule *mod;
>> +    unsigned int i;
>> +    bool dom0found = false;
>> +    bool domUfound = false;
>> +
>> +    /* Look into the bootmodules */
>> +    for ( i = 0 ; i < mods->nr_mods ; i++ )
>> +    {
>> +        mod = &mods->module[i];
>> +        /* Find if dom0 and domU kernels are present */
>> +        if ( mod->kind == BOOTMOD_KERNEL )
>> +        {
>> +            if ( mod->domU == false )
>> +            {
>> +                dom0found = true;
>> +                break;
>> +            }
>> +            else
>> +                domUfound = true;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * If there is no dom0 kernel but at least one domU, then we are in
>> +     * dom0less mode
>> +     */
>> +    return ( !dom0found && domUfound );
>> +}
>> +
>> +static bool __init allocate_bank_memory(struct domain *d,
>> +                                        struct kernel_info *kinfo,
>> +                                        gfn_t sgfn,
>> +                                        paddr_t tot_size)
> 
> I understand that this is today only used by domUs. However, we could technically use it for allocating dom0 memory if it is not 1:1.
> 
> So I think this function should stay in domain_build.c.

Ok I can leave it there, is it ok for you if I protect it using the KConfig in domain_build.c since that code is currently not used apart from dom0less?
(I will do that in the last patch if you agree)

> 
> The rest of the code movement looks alright to me.
> 
> Cheers,
> 
> -- 
> Julien Grall
Julien Grall Nov. 2, 2023, 11:28 a.m. UTC | #16
On 02/11/2023 11:15, Luca Fancellu wrote:
> 
> Hi Julien,
> 
>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
>>> new file mode 100644
>>> index 000000000000..dc9c90cf00a7
>>> --- /dev/null
>>> +++ b/xen/arch/arm/dom0less-build.c
>>> @@ -0,0 +1,1087 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * xen/arch/arm/dom0less-build.c
>>> + *
>>> + * Code related to the dom0less functionality
>>> + *
>>> + * Copyright (C) 2023 Arm Ltd.
>>
>> This feels a bit odd to add your copyright here. It sounds like Arm wrote all the code, but you only moved. That said, I am not a lawyer.
> 
> Mmm yes, we should be required to put our copyright in everything we create or modify or touch,
> said that, I could modify that to be like this:

This is debatable. IIRC we discussed this in a recent-ish community 
call. From my understanding, we are tracking the authorship/copyright 
using signed-off-by/author in the commit message.

> 
> /*
>   * xen/arch/arm/dom0less-build.c
>   *
>   * Code related to the dom0less functionality
>   *
>   * Original code from xen/arch/arm/domain_build.c
>   * Modifications: Copyright (C) 2023 Arm Ltd.
>   */
> 
> What do you think?

TBH, it is not clear to me what sort of modifications you did that would 
require a copyright from Arm. AFAICT, you only moved the code.

Anyway, I will let the others express their views here.

> 
>>
>>> + */
>>> +
>>> +#include <xen/device_tree.h>
>>> +#include <xen/err.h>
>>> +#include <xen/event.h>
>>> +#include <xen/grant_table.h>
>>> +#include <xen/iocap.h>
>>> +#include <xen/libfdt/libfdt.h>
>>> +#include <xen/sched.h>
>>> +#include <xen/serial.h>
>>> +#include <xen/sizes.h>
>>> +#include <xen/vmap.h>
>>> +
>>> +#include <asm/arm64/sve.h>
>>> +#include <asm/dom0less-build.h>
>>> +#include <asm/domain_build.h>
>>> +
>>> +bool __init is_dom0less_mode(void)
>>> +{
>>> +    struct bootmodules *mods = &bootinfo.modules;
>>> +    struct bootmodule *mod;
>>> +    unsigned int i;
>>> +    bool dom0found = false;
>>> +    bool domUfound = false;
>>> +
>>> +    /* Look into the bootmodules */
>>> +    for ( i = 0 ; i < mods->nr_mods ; i++ )
>>> +    {
>>> +        mod = &mods->module[i];
>>> +        /* Find if dom0 and domU kernels are present */
>>> +        if ( mod->kind == BOOTMOD_KERNEL )
>>> +        {
>>> +            if ( mod->domU == false )
>>> +            {
>>> +                dom0found = true;
>>> +                break;
>>> +            }
>>> +            else
>>> +                domUfound = true;
>>> +        }
>>> +    }
>>> +
>>> +    /*
>>> +     * If there is no dom0 kernel but at least one domU, then we are in
>>> +     * dom0less mode
>>> +     */
>>> +    return ( !dom0found && domUfound );
>>> +}
>>> +
>>> +static bool __init allocate_bank_memory(struct domain *d,
>>> +                                        struct kernel_info *kinfo,
>>> +                                        gfn_t sgfn,
>>> +                                        paddr_t tot_size)
>>
>> I understand that this is today only used by domUs. However, we could technically use it for allocating dom0 memory if it is not 1:1.
>>
>> So I think this function should stay in domain_build.c.
> 
> Ok I can leave it there, is it ok for you if I protect it using the KConfig in domain_build.c since that code is currently not used apart from dom0less?
> (I will do that in the last patch if you agree)

I am fine with that.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 81c31c36fc3d..70dd7201ef30 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -15,6 +15,7 @@  obj-y += cpufeature.o
 obj-y += decode.o
 obj-y += device.o
 obj-$(CONFIG_IOREQ_SERVER) += dm.o
+obj-y += dom0less-build.init.o
 obj-y += domain.o
 obj-y += domain_build.init.o
 obj-$(CONFIG_ARCH_MAP_DOMAIN_PAGE) += domain_page.o
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
new file mode 100644
index 000000000000..dc9c90cf00a7
--- /dev/null
+++ b/xen/arch/arm/dom0less-build.c
@@ -0,0 +1,1087 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * xen/arch/arm/dom0less-build.c
+ *
+ * Code related to the dom0less functionality
+ *
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#include <xen/device_tree.h>
+#include <xen/err.h>
+#include <xen/event.h>
+#include <xen/grant_table.h>
+#include <xen/iocap.h>
+#include <xen/libfdt/libfdt.h>
+#include <xen/sched.h>
+#include <xen/serial.h>
+#include <xen/sizes.h>
+#include <xen/vmap.h>
+
+#include <asm/arm64/sve.h>
+#include <asm/dom0less-build.h>
+#include <asm/domain_build.h>
+
+bool __init is_dom0less_mode(void)
+{
+    struct bootmodules *mods = &bootinfo.modules;
+    struct bootmodule *mod;
+    unsigned int i;
+    bool dom0found = false;
+    bool domUfound = false;
+
+    /* Look into the bootmodules */
+    for ( i = 0 ; i < mods->nr_mods ; i++ )
+    {
+        mod = &mods->module[i];
+        /* Find if dom0 and domU kernels are present */
+        if ( mod->kind == BOOTMOD_KERNEL )
+        {
+            if ( mod->domU == false )
+            {
+                dom0found = true;
+                break;
+            }
+            else
+                domUfound = true;
+        }
+    }
+
+    /*
+     * If there is no dom0 kernel but at least one domU, then we are in
+     * dom0less mode
+     */
+    return ( !dom0found && domUfound );
+}
+
+static bool __init allocate_bank_memory(struct domain *d,
+                                        struct kernel_info *kinfo,
+                                        gfn_t sgfn,
+                                        paddr_t tot_size)
+{
+    int res;
+    struct page_info *pg;
+    struct membank *bank;
+    unsigned int max_order = ~0;
+
+    /*
+     * allocate_bank_memory can be called with a tot_size of zero for
+     * the second memory bank. It is not an error and we can safely
+     * avoid creating a zero-size memory bank.
+     */
+    if ( tot_size == 0 )
+        return true;
+
+    bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
+    bank->start = gfn_to_gaddr(sgfn);
+    bank->size = tot_size;
+
+    while ( tot_size > 0 )
+    {
+        unsigned int order = get_allocation_size(tot_size);
+
+        order = min(max_order, order);
+
+        pg = alloc_domheap_pages(d, order, 0);
+        if ( !pg )
+        {
+            /*
+             * If we can't allocate one page, then it is unlikely to
+             * succeed in the next iteration. So bail out.
+             */
+            if ( !order )
+                return false;
+
+            /*
+             * If we can't allocate memory with order, then it is
+             * unlikely to succeed in the next iteration.
+             * Record the order - 1 to avoid re-trying.
+             */
+            max_order = order - 1;
+            continue;
+        }
+
+        res = guest_physmap_add_page(d, sgfn, page_to_mfn(pg), order);
+        if ( res )
+        {
+            dprintk(XENLOG_ERR, "Failed map pages to DOMU: %d", res);
+            return false;
+        }
+
+        sgfn = gfn_add(sgfn, 1UL << order);
+        tot_size -= (1ULL << (PAGE_SHIFT + order));
+    }
+
+    kinfo->mem.nr_banks++;
+    kinfo->unassigned_mem -= bank->size;
+
+    return true;
+}
+
+static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
+{
+    unsigned int i;
+    paddr_t bank_size;
+
+    printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
+           /* Don't want format this as PRIpaddr (16 digit hex) */
+           (unsigned long)(kinfo->unassigned_mem >> 20), d);
+
+    kinfo->mem.nr_banks = 0;
+    bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem);
+    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM0_BASE),
+                               bank_size) )
+        goto fail;
+
+    bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem);
+    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM1_BASE),
+                               bank_size) )
+        goto fail;
+
+    if ( kinfo->unassigned_mem )
+        goto fail;
+
+    for( i = 0; i < kinfo->mem.nr_banks; i++ )
+    {
+        printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
+               d,
+               i,
+               kinfo->mem.bank[i].start,
+               kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
+               /* Don't want format this as PRIpaddr (16 digit hex) */
+               (unsigned long)(kinfo->mem.bank[i].size >> 20));
+    }
+
+    return;
+
+fail:
+    panic("Failed to allocate requested domain memory."
+          /* Don't want format this as PRIpaddr (16 digit hex) */
+          " %ldKB unallocated. Fix the VMs configurations.\n",
+          (unsigned long)kinfo->unassigned_mem >> 10);
+}
+
+#ifdef CONFIG_VGICV2
+static int __init make_gicv2_domU_node(struct kernel_info *kinfo)
+{
+    void *fdt = kinfo->fdt;
+    int res = 0;
+    __be32 reg[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) * 2];
+    __be32 *cells;
+    const struct domain *d = kinfo->d;
+
+    res = domain_fdt_begin_node(fdt, "interrupt-controller",
+                                vgic_dist_base(&d->arch.vgic));
+    if ( res )
+        return res;
+
+    res = fdt_property_cell(fdt, "#address-cells", 0);
+    if ( res )
+        return res;
+
+    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
+    if ( res )
+        return res;
+
+    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
+    if ( res )
+        return res;
+
+    res = fdt_property_string(fdt, "compatible", "arm,gic-400");
+    if ( res )
+        return res;
+
+    cells = &reg[0];
+    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+                       vgic_dist_base(&d->arch.vgic), GUEST_GICD_SIZE);
+    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+                       vgic_cpu_base(&d->arch.vgic), GUEST_GICC_SIZE);
+
+    res = fdt_property(fdt, "reg", reg, sizeof(reg));
+    if (res)
+        return res;
+
+    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
+    if (res)
+        return res;
+
+    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
+    if (res)
+        return res;
+
+    res = fdt_end_node(fdt);
+
+    return res;
+}
+#endif
+
+#ifdef CONFIG_GICV3
+static int __init make_gicv3_domU_node(struct kernel_info *kinfo)
+{
+    void *fdt = kinfo->fdt;
+    int res = 0;
+    __be32 *reg, *cells;
+    const struct domain *d = kinfo->d;
+    unsigned int i, len = 0;
+
+    res = domain_fdt_begin_node(fdt, "interrupt-controller",
+                                vgic_dist_base(&d->arch.vgic));
+    if ( res )
+        return res;
+
+    res = fdt_property_cell(fdt, "#address-cells", 0);
+    if ( res )
+        return res;
+
+    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
+    if ( res )
+        return res;
+
+    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
+    if ( res )
+        return res;
+
+    res = fdt_property_string(fdt, "compatible", "arm,gic-v3");
+    if ( res )
+        return res;
+
+    /* reg specifies all re-distributors and Distributor. */
+    len = (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
+          (d->arch.vgic.nr_regions + 1) * sizeof(__be32);
+    reg = xmalloc_bytes(len);
+    if ( reg == NULL )
+        return -ENOMEM;
+    cells = reg;
+
+    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+                       vgic_dist_base(&d->arch.vgic), GUEST_GICV3_GICD_SIZE);
+
+    for ( i = 0; i < d->arch.vgic.nr_regions; i++ )
+        dt_child_set_range(&cells,
+                           GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+                           d->arch.vgic.rdist_regions[i].base,
+                           d->arch.vgic.rdist_regions[i].size);
+
+    res = fdt_property(fdt, "reg", reg, len);
+    xfree(reg);
+    if (res)
+        return res;
+
+    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
+    if (res)
+        return res;
+
+    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
+    if (res)
+        return res;
+
+    res = fdt_end_node(fdt);
+
+    return res;
+}
+#endif
+
+static int __init make_gic_domU_node(struct kernel_info *kinfo)
+{
+    switch ( kinfo->d->arch.vgic.version )
+    {
+#ifdef CONFIG_GICV3
+    case GIC_V3:
+        return make_gicv3_domU_node(kinfo);
+#endif
+#ifdef CONFIG_VGICV2
+    case GIC_V2:
+        return make_gicv2_domU_node(kinfo);
+#endif
+    default:
+        panic("Unsupported GIC version\n");
+    }
+}
+
+#ifdef CONFIG_SBSA_VUART_CONSOLE
+static int __init make_vpl011_uart_node(struct kernel_info *kinfo)
+{
+    void *fdt = kinfo->fdt;
+    int res;
+    gic_interrupt_t intr;
+    __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
+    __be32 *cells;
+    struct domain *d = kinfo->d;
+
+    res = domain_fdt_begin_node(fdt, "sbsa-uart", d->arch.vpl011.base_addr);
+    if ( res )
+        return res;
+
+    res = fdt_property_string(fdt, "compatible", "arm,sbsa-uart");
+    if ( res )
+        return res;
+
+    cells = &reg[0];
+    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS,
+                       GUEST_ROOT_SIZE_CELLS, d->arch.vpl011.base_addr,
+                       GUEST_PL011_SIZE);
+
+    res = fdt_property(fdt, "reg", reg, sizeof(reg));
+    if ( res )
+        return res;
+
+    set_interrupt(intr, d->arch.vpl011.virq, 0xf, DT_IRQ_TYPE_LEVEL_HIGH);
+
+    res = fdt_property(fdt, "interrupts", intr, sizeof (intr));
+    if ( res )
+        return res;
+
+    res = fdt_property_cell(fdt, "interrupt-parent",
+                            kinfo->phandle_gic);
+    if ( res )
+        return res;
+
+    /* Use a default baud rate of 115200. */
+    fdt_property_u32(fdt, "current-speed", 115200);
+
+    res = fdt_end_node(fdt);
+    if ( res )
+        return res;
+
+    return 0;
+}
+#endif
+
+/*
+ * Scan device tree properties for passthrough specific information.
+ * Returns < 0 on error
+ *         0 on success
+ */
+static int __init handle_passthrough_prop(struct kernel_info *kinfo,
+                                          const struct fdt_property *xen_reg,
+                                          const struct fdt_property *xen_path,
+                                          bool xen_force,
+                                          uint32_t address_cells,
+                                          uint32_t size_cells)
+{
+    const __be32 *cell;
+    unsigned int i, len;
+    struct dt_device_node *node;
+    int res;
+    paddr_t mstart, size, gstart;
+
+    /* xen,reg specifies where to map the MMIO region */
+    cell = (const __be32 *)xen_reg->data;
+    len = fdt32_to_cpu(xen_reg->len) / ((address_cells * 2 + size_cells) *
+                                        sizeof(uint32_t));
+
+    for ( i = 0; i < len; i++ )
+    {
+        device_tree_get_reg(&cell, address_cells, size_cells,
+                            &mstart, &size);
+        gstart = dt_next_cell(address_cells, &cell);
+
+        if ( gstart & ~PAGE_MASK || mstart & ~PAGE_MASK || size & ~PAGE_MASK )
+        {
+            printk(XENLOG_ERR
+                   "DomU passthrough config has not page aligned addresses/sizes\n");
+            return -EINVAL;
+        }
+
+        res = iomem_permit_access(kinfo->d, paddr_to_pfn(mstart),
+                                  paddr_to_pfn(PAGE_ALIGN(mstart + size - 1)));
+        if ( res )
+        {
+            printk(XENLOG_ERR "Unable to permit to dom%d access to"
+                   " 0x%"PRIpaddr" - 0x%"PRIpaddr"\n",
+                   kinfo->d->domain_id,
+                   mstart & PAGE_MASK, PAGE_ALIGN(mstart + size) - 1);
+            return res;
+        }
+
+        res = map_regions_p2mt(kinfo->d,
+                               gaddr_to_gfn(gstart),
+                               PFN_DOWN(size),
+                               maddr_to_mfn(mstart),
+                               p2m_mmio_direct_dev);
+        if ( res < 0 )
+        {
+            printk(XENLOG_ERR
+                   "Failed to map %"PRIpaddr" to the guest at%"PRIpaddr"\n",
+                   mstart, gstart);
+            return -EFAULT;
+        }
+    }
+
+    /*
+     * If xen_force, we let the user assign a MMIO region with no
+     * associated path.
+     */
+    if ( xen_path == NULL )
+        return xen_force ? 0 : -EINVAL;
+
+    /*
+     * xen,path specifies the corresponding node in the host DT.
+     * Both interrupt mappings and IOMMU settings are based on it,
+     * as they are done based on the corresponding host DT node.
+     */
+    node = dt_find_node_by_path(xen_path->data);
+    if ( node == NULL )
+    {
+        printk(XENLOG_ERR "Couldn't find node %s in host_dt!\n",
+               (char *)xen_path->data);
+        return -EINVAL;
+    }
+
+    res = map_device_irqs_to_domain(kinfo->d, node, true, NULL);
+    if ( res < 0 )
+        return res;
+
+    res = iommu_add_dt_device(node);
+    if ( res < 0 )
+        return res;
+
+    /* If xen_force, we allow assignment of devices without IOMMU protection. */
+    if ( xen_force && !dt_device_is_protected(node) )
+        return 0;
+
+    return iommu_assign_dt_device(kinfo->d, node);
+}
+
+static int __init handle_prop_pfdt(struct kernel_info *kinfo,
+                                   const void *pfdt, int nodeoff,
+                                   uint32_t address_cells, uint32_t size_cells,
+                                   bool scan_passthrough_prop)
+{
+    void *fdt = kinfo->fdt;
+    int propoff, nameoff, res;
+    const struct fdt_property *prop, *xen_reg = NULL, *xen_path = NULL;
+    const char *name;
+    bool found, xen_force = false;
+
+    for ( propoff = fdt_first_property_offset(pfdt, nodeoff);
+          propoff >= 0;
+          propoff = fdt_next_property_offset(pfdt, propoff) )
+    {
+        if ( !(prop = fdt_get_property_by_offset(pfdt, propoff, NULL)) )
+            return -FDT_ERR_INTERNAL;
+
+        found = false;
+        nameoff = fdt32_to_cpu(prop->nameoff);
+        name = fdt_string(pfdt, nameoff);
+
+        if ( scan_passthrough_prop )
+        {
+            if ( dt_prop_cmp("xen,reg", name) == 0 )
+            {
+                xen_reg = prop;
+                found = true;
+            }
+            else if ( dt_prop_cmp("xen,path", name) == 0 )
+            {
+                xen_path = prop;
+                found = true;
+            }
+            else if ( dt_prop_cmp("xen,force-assign-without-iommu",
+                                  name) == 0 )
+            {
+                xen_force = true;
+                found = true;
+            }
+        }
+
+        /*
+         * Copy properties other than the ones above: xen,reg, xen,path,
+         * and xen,force-assign-without-iommu.
+         */
+        if ( !found )
+        {
+            res = fdt_property(fdt, name, prop->data, fdt32_to_cpu(prop->len));
+            if ( res )
+                return res;
+        }
+    }
+
+    /*
+     * Only handle passthrough properties if both xen,reg and xen,path
+     * are present, or if xen,force-assign-without-iommu is specified.
+     */
+    if ( xen_reg != NULL && (xen_path != NULL || xen_force) )
+    {
+        res = handle_passthrough_prop(kinfo, xen_reg, xen_path, xen_force,
+                                      address_cells, size_cells);
+        if ( res < 0 )
+        {
+            printk(XENLOG_ERR "Failed to assign device to %pd\n", kinfo->d);
+            return res;
+        }
+    }
+    else if ( (xen_path && !xen_reg) || (xen_reg && !xen_path && !xen_force) )
+    {
+        printk(XENLOG_ERR "xen,reg or xen,path missing for %pd\n",
+               kinfo->d);
+        return -EINVAL;
+    }
+
+    /* FDT_ERR_NOTFOUND => There is no more properties for this node */
+    return ( propoff != -FDT_ERR_NOTFOUND ) ? propoff : 0;
+}
+
+static int __init scan_pfdt_node(struct kernel_info *kinfo, const void *pfdt,
+                                 int nodeoff,
+                                 uint32_t address_cells, uint32_t size_cells,
+                                 bool scan_passthrough_prop)
+{
+    int rc = 0;
+    void *fdt = kinfo->fdt;
+    int node_next;
+
+    rc = fdt_begin_node(fdt, fdt_get_name(pfdt, nodeoff, NULL));
+    if ( rc )
+        return rc;
+
+    rc = handle_prop_pfdt(kinfo, pfdt, nodeoff, address_cells, size_cells,
+                          scan_passthrough_prop);
+    if ( rc )
+        return rc;
+
+    address_cells = device_tree_get_u32(pfdt, nodeoff, "#address-cells",
+                                        DT_ROOT_NODE_ADDR_CELLS_DEFAULT);
+    size_cells = device_tree_get_u32(pfdt, nodeoff, "#size-cells",
+                                     DT_ROOT_NODE_SIZE_CELLS_DEFAULT);
+
+    node_next = fdt_first_subnode(pfdt, nodeoff);
+    while ( node_next > 0 )
+    {
+        scan_pfdt_node(kinfo, pfdt, node_next, address_cells, size_cells,
+                       scan_passthrough_prop);
+        node_next = fdt_next_subnode(pfdt, node_next);
+    }
+
+    return fdt_end_node(fdt);
+}
+
+static int __init check_partial_fdt(void *pfdt, size_t size)
+{
+    int res;
+
+    if ( fdt_magic(pfdt) != FDT_MAGIC )
+    {
+        dprintk(XENLOG_ERR, "Partial FDT is not a valid Flat Device Tree");
+        return -EINVAL;
+    }
+
+    res = fdt_check_header(pfdt);
+    if ( res )
+    {
+        dprintk(XENLOG_ERR, "Failed to check the partial FDT (%d)", res);
+        return -EINVAL;
+    }
+
+    if ( fdt_totalsize(pfdt) > size )
+    {
+        dprintk(XENLOG_ERR, "Partial FDT totalsize is too big");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int __init domain_handle_dtb_bootmodule(struct domain *d,
+                                               struct kernel_info *kinfo)
+{
+    void *pfdt;
+    int res, node_next;
+
+    pfdt = ioremap_cache(kinfo->dtb_bootmodule->start,
+                         kinfo->dtb_bootmodule->size);
+    if ( pfdt == NULL )
+        return -EFAULT;
+
+    res = check_partial_fdt(pfdt, kinfo->dtb_bootmodule->size);
+    if ( res < 0 )
+        goto out;
+
+    for ( node_next = fdt_first_subnode(pfdt, 0);
+          node_next > 0;
+          node_next = fdt_next_subnode(pfdt, node_next) )
+    {
+        const char *name = fdt_get_name(pfdt, node_next, NULL);
+
+        if ( name == NULL )
+            continue;
+
+        /*
+         * Only scan /gic /aliases /passthrough, ignore the rest.
+         * They don't have to be parsed in order.
+         *
+         * Take the GIC phandle value from the special /gic node in the
+         * DTB fragment.
+         */
+        if ( dt_node_cmp(name, "gic") == 0 )
+        {
+            kinfo->phandle_gic = fdt_get_phandle(pfdt, node_next);
+            continue;
+        }
+
+        if ( dt_node_cmp(name, "aliases") == 0 )
+        {
+            res = scan_pfdt_node(kinfo, pfdt, node_next,
+                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
+                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
+                                 false);
+            if ( res )
+                goto out;
+            continue;
+        }
+        if ( dt_node_cmp(name, "passthrough") == 0 )
+        {
+            res = scan_pfdt_node(kinfo, pfdt, node_next,
+                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
+                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
+                                 true);
+            if ( res )
+                goto out;
+            continue;
+        }
+    }
+
+ out:
+    iounmap(pfdt);
+
+    return res;
+}
+
+/*
+ * The max size for DT is 2MB. However, the generated DT is small (not including
+ * domU passthrough DT nodes whose size we account separately), 4KB are enough
+ * for now, but we might have to increase it in the future.
+ */
+#define DOMU_DTB_SIZE 4096
+static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
+{
+    int addrcells, sizecells;
+    int ret, fdt_size = DOMU_DTB_SIZE;
+
+    kinfo->phandle_gic = GUEST_PHANDLE_GIC;
+    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
+    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
+
+    addrcells = GUEST_ROOT_ADDRESS_CELLS;
+    sizecells = GUEST_ROOT_SIZE_CELLS;
+
+    /* Account for domU passthrough DT size */
+    if ( kinfo->dtb_bootmodule )
+        fdt_size += kinfo->dtb_bootmodule->size;
+
+    /* Cap to max DT size if needed */
+    fdt_size = min(fdt_size, SZ_2M);
+
+    kinfo->fdt = xmalloc_bytes(fdt_size);
+    if ( kinfo->fdt == NULL )
+        return -ENOMEM;
+
+    ret = fdt_create(kinfo->fdt, fdt_size);
+    if ( ret < 0 )
+        goto err;
+
+    ret = fdt_finish_reservemap(kinfo->fdt);
+    if ( ret < 0 )
+        goto err;
+
+    ret = fdt_begin_node(kinfo->fdt, "");
+    if ( ret < 0 )
+        goto err;
+
+    ret = fdt_property_cell(kinfo->fdt, "#address-cells", addrcells);
+    if ( ret )
+        goto err;
+
+    ret = fdt_property_cell(kinfo->fdt, "#size-cells", sizecells);
+    if ( ret )
+        goto err;
+
+    ret = make_chosen_node(kinfo);
+    if ( ret )
+        goto err;
+
+    ret = make_psci_node(kinfo->fdt);
+    if ( ret )
+        goto err;
+
+    ret = make_cpus_node(d, kinfo->fdt);
+    if ( ret )
+        goto err;
+
+    ret = make_memory_node(d, kinfo->fdt, addrcells, sizecells, &kinfo->mem);
+    if ( ret )
+        goto err;
+
+    ret = make_resv_memory_node(d, kinfo->fdt, addrcells, sizecells,
+                                &kinfo->shm_mem);
+    if ( ret )
+        goto err;
+
+    /*
+     * domain_handle_dtb_bootmodule has to be called before the rest of
+     * the device tree is generated because it depends on the value of
+     * the field phandle_gic.
+     */
+    if ( kinfo->dtb_bootmodule )
+    {
+        ret = domain_handle_dtb_bootmodule(d, kinfo);
+        if ( ret )
+            goto err;
+    }
+
+    ret = make_gic_domU_node(kinfo);
+    if ( ret )
+        goto err;
+
+    ret = make_timer_node(kinfo);
+    if ( ret )
+        goto err;
+
+    if ( kinfo->vpl011 )
+    {
+        ret = -EINVAL;
+#ifdef CONFIG_SBSA_VUART_CONSOLE
+        ret = make_vpl011_uart_node(kinfo);
+#endif
+        if ( ret )
+            goto err;
+    }
+
+    if ( kinfo->dom0less_feature & DOM0LESS_ENHANCED_NO_XS )
+    {
+        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
+        if ( ret )
+            goto err;
+    }
+
+    ret = fdt_end_node(kinfo->fdt);
+    if ( ret < 0 )
+        goto err;
+
+    ret = fdt_finish(kinfo->fdt);
+    if ( ret < 0 )
+        goto err;
+
+    return 0;
+
+  err:
+    printk("Device tree generation failed (%d).\n", ret);
+    xfree(kinfo->fdt);
+
+    return -EINVAL;
+}
+
+static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
+                                             unsigned int smp_cpus)
+{
+    /*
+     * Keep in sync with libxl__get_required_paging_memory().
+     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
+     * plus 128 pages to cover extended regions.
+     */
+    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
+
+    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
+
+    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
+}
+
+static int __init alloc_xenstore_evtchn(struct domain *d)
+{
+    evtchn_alloc_unbound_t alloc;
+    int rc;
+
+    alloc.dom = d->domain_id;
+    alloc.remote_dom = hardware_domain->domain_id;
+    rc = evtchn_alloc_unbound(&alloc, 0);
+    if ( rc )
+    {
+        printk("Failed allocating event channel for domain\n");
+        return rc;
+    }
+
+    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
+
+    return 0;
+}
+
+static int __init construct_domU(struct domain *d,
+                                 const struct dt_device_node *node)
+{
+    struct kernel_info kinfo = {};
+    const char *dom0less_enhanced;
+    int rc;
+    u64 mem;
+    u32 p2m_mem_mb;
+    unsigned long p2m_pages;
+
+    rc = dt_property_read_u64(node, "memory", &mem);
+    if ( !rc )
+    {
+        printk("Error building DomU: cannot read \"memory\" property\n");
+        return -EINVAL;
+    }
+    kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
+
+    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
+    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
+    p2m_pages = rc ?
+                p2m_mem_mb << (20 - PAGE_SHIFT) :
+                domain_p2m_pages(mem, d->max_vcpus);
+
+    spin_lock(&d->arch.paging.lock);
+    rc = p2m_set_allocation(d, p2m_pages, NULL);
+    spin_unlock(&d->arch.paging.lock);
+    if ( rc != 0 )
+        return rc;
+
+    printk("*** LOADING DOMU cpus=%u memory=%#"PRIx64"KB ***\n",
+           d->max_vcpus, mem);
+
+    kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
+
+    rc = dt_property_read_string(node, "xen,enhanced", &dom0less_enhanced);
+    if ( rc == -EILSEQ ||
+         rc == -ENODATA ||
+         (rc == 0 && !strcmp(dom0less_enhanced, "enabled")) )
+    {
+        if ( hardware_domain )
+            kinfo.dom0less_feature = DOM0LESS_ENHANCED;
+        else
+            panic("At the moment, Xenstore support requires dom0 to be present\n");
+    }
+    else if ( rc == 0 && !strcmp(dom0less_enhanced, "no-xenstore") )
+        kinfo.dom0less_feature = DOM0LESS_ENHANCED_NO_XS;
+
+    if ( vcpu_create(d, 0) == NULL )
+        return -ENOMEM;
+
+    d->max_pages = ((paddr_t)mem * SZ_1K) >> PAGE_SHIFT;
+
+    kinfo.d = d;
+
+    rc = kernel_probe(&kinfo, node);
+    if ( rc < 0 )
+        return rc;
+
+#ifdef CONFIG_ARM_64
+    /* type must be set before allocate memory */
+    d->arch.type = kinfo.type;
+#endif
+    if ( !dt_find_property(node, "xen,static-mem", NULL) )
+        allocate_memory(d, &kinfo);
+    else if ( !is_domain_direct_mapped(d) )
+        allocate_static_memory(d, &kinfo, node);
+    else
+        assign_static_memory_11(d, &kinfo, node);
+
+#ifdef CONFIG_STATIC_SHM
+    rc = process_shm(d, &kinfo, node);
+    if ( rc < 0 )
+        return rc;
+#endif
+
+    /*
+     * Base address and irq number are needed when creating vpl011 device
+     * tree node in prepare_dtb_domU, so initialization on related variables
+     * shall be done first.
+     */
+    if ( kinfo.vpl011 )
+    {
+        rc = domain_vpl011_init(d, NULL);
+        if ( rc < 0 )
+            return rc;
+    }
+
+    rc = prepare_dtb_domU(d, &kinfo);
+    if ( rc < 0 )
+        return rc;
+
+    rc = construct_domain(d, &kinfo);
+    if ( rc < 0 )
+        return rc;
+
+    if ( kinfo.dom0less_feature & DOM0LESS_XENSTORE )
+    {
+        ASSERT(hardware_domain);
+        rc = alloc_xenstore_evtchn(d);
+        if ( rc < 0 )
+            return rc;
+        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
+    }
+
+    return rc;
+}
+
+void __init create_domUs(void)
+{
+    struct dt_device_node *node;
+    const struct dt_device_node *cpupool_node,
+                                *chosen = dt_find_node_by_path("/chosen");
+
+    BUG_ON(chosen == NULL);
+    dt_for_each_child_node(chosen, node)
+    {
+        struct domain *d;
+        struct xen_domctl_createdomain d_cfg = {
+            .arch.gic_version = XEN_DOMCTL_CONFIG_GIC_NATIVE,
+            .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
+            /*
+             * The default of 1023 should be sufficient for guests because
+             * on ARM we don't bind physical interrupts to event channels.
+             * The only use of the evtchn port is inter-domain communications.
+             * 1023 is also the default value used in libxl.
+             */
+            .max_evtchn_port = 1023,
+            .max_grant_frames = -1,
+            .max_maptrack_frames = -1,
+            .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
+        };
+        unsigned int flags = 0U;
+        uint32_t val;
+        int rc;
+
+        if ( !dt_device_is_compatible(node, "xen,domain") )
+            continue;
+
+        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
+            panic("No more domain IDs available\n");
+
+        if ( dt_find_property(node, "xen,static-mem", NULL) )
+            flags |= CDF_staticmem;
+
+        if ( dt_property_read_bool(node, "direct-map") )
+        {
+            if ( !(flags & CDF_staticmem) )
+                panic("direct-map is not valid for domain %s without static allocation.\n",
+                      dt_node_name(node));
+
+            flags |= CDF_directmap;
+        }
+
+        if ( !dt_property_read_u32(node, "cpus", &d_cfg.max_vcpus) )
+            panic("Missing property 'cpus' for domain %s\n",
+                  dt_node_name(node));
+
+        if ( dt_find_compatible_node(node, NULL, "multiboot,device-tree") &&
+             iommu_enabled )
+            d_cfg.flags |= XEN_DOMCTL_CDF_iommu;
+
+        if ( !dt_property_read_u32(node, "nr_spis", &d_cfg.arch.nr_spis) )
+        {
+            int vpl011_virq = GUEST_VPL011_SPI;
+
+            d_cfg.arch.nr_spis = gic_number_lines() - 32;
+
+            /*
+             * The VPL011 virq is GUEST_VPL011_SPI, unless direct-map is
+             * set, in which case it'll match the hardware.
+             *
+             * Since the domain is not yet created, we can't use
+             * d->arch.vpl011.irq. So the logic to find the vIRQ has to
+             * be hardcoded.
+             * The logic here shall be consistent with the one in
+             * domain_vpl011_init().
+             */
+            if ( flags & CDF_directmap )
+            {
+                vpl011_virq = serial_irq(SERHND_DTUART);
+                if ( vpl011_virq < 0 )
+                    panic("Error getting IRQ number for this serial port %d\n",
+                          SERHND_DTUART);
+            }
+
+            /*
+             * vpl011 uses one emulated SPI. If vpl011 is requested, make
+             * sure that we allocate enough SPIs for it.
+             */
+            if ( dt_property_read_bool(node, "vpl011") )
+                d_cfg.arch.nr_spis = MAX(d_cfg.arch.nr_spis,
+                                         vpl011_virq - 32 + 1);
+        }
+
+        /* Get the optional property domain-cpupool */
+        cpupool_node = dt_parse_phandle(node, "domain-cpupool", 0);
+        if ( cpupool_node )
+        {
+            int pool_id = btcpupools_get_domain_pool_id(cpupool_node);
+            if ( pool_id < 0 )
+                panic("Error getting cpupool id from domain-cpupool (%d)\n",
+                      pool_id);
+            d_cfg.cpupool_id = pool_id;
+        }
+
+        if ( dt_property_read_u32(node, "max_grant_version", &val) )
+            d_cfg.grant_opts = XEN_DOMCTL_GRANT_version(val);
+
+        if ( dt_property_read_u32(node, "max_grant_frames", &val) )
+        {
+            if ( val > INT32_MAX )
+                panic("max_grant_frames (%"PRIu32") overflow\n", val);
+            d_cfg.max_grant_frames = val;
+        }
+
+        if ( dt_property_read_u32(node, "max_maptrack_frames", &val) )
+        {
+            if ( val > INT32_MAX )
+                panic("max_maptrack_frames (%"PRIu32") overflow\n", val);
+            d_cfg.max_maptrack_frames = val;
+        }
+
+        if ( dt_get_property(node, "sve", &val) )
+        {
+#ifdef CONFIG_ARM64_SVE
+            unsigned int sve_vl_bits;
+            bool ret = false;
+
+            if ( !val )
+            {
+                /* Property found with no value, means max HW VL supported */
+                ret = sve_domctl_vl_param(-1, &sve_vl_bits);
+            }
+            else
+            {
+                if ( dt_property_read_u32(node, "sve", &val) )
+                    ret = sve_domctl_vl_param(val, &sve_vl_bits);
+                else
+                    panic("Error reading 'sve' property\n");
+            }
+
+            if ( ret )
+                d_cfg.arch.sve_vl = sve_encode_vl(sve_vl_bits);
+            else
+                panic("SVE vector length error\n");
+#else
+            panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n");
+#endif
+        }
+
+        /*
+         * The variable max_init_domid is initialized with zero, so here it's
+         * very important to use the pre-increment operator to call
+         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
+         */
+        d = domain_create(++max_init_domid, &d_cfg, flags);
+        if ( IS_ERR(d) )
+            panic("Error creating domain %s (rc = %ld)\n",
+                  dt_node_name(node), PTR_ERR(d));
+
+        d->is_console = true;
+        dt_device_set_used_by(node, d->domain_id);
+
+        rc = construct_domU(d, node);
+        if ( rc )
+            panic("Could not set up domain %s (rc = %d)\n",
+                  dt_node_name(node), rc);
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index f69f238ccea4..a0d656b33629 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -28,6 +28,7 @@ 
 #include <asm/setup.h>
 #include <asm/arm64/sve.h>
 #include <asm/cpufeature.h>
+#include <asm/dom0less-build.h>
 #include <asm/domain_build.h>
 #include <xen/event.h>
 
@@ -117,7 +118,7 @@  struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
     return vcpu_create(dom0, 0);
 }
 
-static unsigned int __init get_allocation_size(paddr_t size)
+unsigned int __init get_allocation_size(paddr_t size)
 {
     /*
      * get_order_from_bytes returns the order greater than or equal to
@@ -413,113 +414,6 @@  static void __init allocate_memory_11(struct domain *d,
     }
 }
 
-static bool __init allocate_bank_memory(struct domain *d,
-                                        struct kernel_info *kinfo,
-                                        gfn_t sgfn,
-                                        paddr_t tot_size)
-{
-    int res;
-    struct page_info *pg;
-    struct membank *bank;
-    unsigned int max_order = ~0;
-
-    /*
-     * allocate_bank_memory can be called with a tot_size of zero for
-     * the second memory bank. It is not an error and we can safely
-     * avoid creating a zero-size memory bank.
-     */
-    if ( tot_size == 0 )
-        return true;
-
-    bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
-    bank->start = gfn_to_gaddr(sgfn);
-    bank->size = tot_size;
-
-    while ( tot_size > 0 )
-    {
-        unsigned int order = get_allocation_size(tot_size);
-
-        order = min(max_order, order);
-
-        pg = alloc_domheap_pages(d, order, 0);
-        if ( !pg )
-        {
-            /*
-             * If we can't allocate one page, then it is unlikely to
-             * succeed in the next iteration. So bail out.
-             */
-            if ( !order )
-                return false;
-
-            /*
-             * If we can't allocate memory with order, then it is
-             * unlikely to succeed in the next iteration.
-             * Record the order - 1 to avoid re-trying.
-             */
-            max_order = order - 1;
-            continue;
-        }
-
-        res = guest_physmap_add_page(d, sgfn, page_to_mfn(pg), order);
-        if ( res )
-        {
-            dprintk(XENLOG_ERR, "Failed map pages to DOMU: %d", res);
-            return false;
-        }
-
-        sgfn = gfn_add(sgfn, 1UL << order);
-        tot_size -= (1ULL << (PAGE_SHIFT + order));
-    }
-
-    kinfo->mem.nr_banks++;
-    kinfo->unassigned_mem -= bank->size;
-
-    return true;
-}
-
-static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
-{
-    unsigned int i;
-    paddr_t bank_size;
-
-    printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
-           /* Don't want format this as PRIpaddr (16 digit hex) */
-           (unsigned long)(kinfo->unassigned_mem >> 20), d);
-
-    kinfo->mem.nr_banks = 0;
-    bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem);
-    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM0_BASE),
-                               bank_size) )
-        goto fail;
-
-    bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem);
-    if ( !allocate_bank_memory(d, kinfo, gaddr_to_gfn(GUEST_RAM1_BASE),
-                               bank_size) )
-        goto fail;
-
-    if ( kinfo->unassigned_mem )
-        goto fail;
-
-    for( i = 0; i < kinfo->mem.nr_banks; i++ )
-    {
-        printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
-               d,
-               i,
-               kinfo->mem.bank[i].start,
-               kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
-               /* Don't want format this as PRIpaddr (16 digit hex) */
-               (unsigned long)(kinfo->mem.bank[i].size >> 20));
-    }
-
-    return;
-
-fail:
-    panic("Failed to allocate requested domain memory."
-          /* Don't want format this as PRIpaddr (16 digit hex) */
-          " %ldKB unallocated. Fix the VMs configurations.\n",
-          (unsigned long)kinfo->unassigned_mem >> 10);
-}
-
 #ifdef CONFIG_STATIC_MEMORY
 static bool __init append_static_memory_to_bank(struct domain *d,
                                                 struct membank *bank,
@@ -599,9 +493,8 @@  static int __init parse_static_mem_prop(const struct dt_device_node *node,
 }
 
 /* Allocate memory from static memory as RAM for one specific domain d. */
-static void __init allocate_static_memory(struct domain *d,
-                                          struct kernel_info *kinfo,
-                                          const struct dt_device_node *node)
+void __init allocate_static_memory(struct domain *d, struct kernel_info *kinfo,
+                                   const struct dt_device_node *node)
 {
     u32 addr_cells, size_cells, reg_cells;
     unsigned int nr_banks, gbank, bank = 0;
@@ -705,9 +598,8 @@  static void __init allocate_static_memory(struct domain *d,
  * The static memory will be directly mapped in the guest(Guest Physical
  * Address == Physical Address).
  */
-static void __init assign_static_memory_11(struct domain *d,
-                                           struct kernel_info *kinfo,
-                                           const struct dt_device_node *node)
+void __init assign_static_memory_11(struct domain *d, struct kernel_info *kinfo,
+                                    const struct dt_device_node *node)
 {
     u32 addr_cells, size_cells, reg_cells;
     unsigned int nr_banks, bank = 0;
@@ -941,8 +833,8 @@  static int __init append_shm_bank_to_domain(struct kernel_info *kinfo,
     return 0;
 }
 
-static int __init process_shm(struct domain *d, struct kernel_info *kinfo,
-                              const struct dt_device_node *node)
+int __init process_shm(struct domain *d, struct kernel_info *kinfo,
+                       const struct dt_device_node *node)
 {
     struct dt_device_node *shm_node;
 
@@ -1047,20 +939,6 @@  static int __init process_shm(struct domain *d, struct kernel_info *kinfo,
     return 0;
 }
 #endif /* CONFIG_STATIC_SHM */
-#else
-static void __init allocate_static_memory(struct domain *d,
-                                          struct kernel_info *kinfo,
-                                          const struct dt_device_node *node)
-{
-    ASSERT_UNREACHABLE();
-}
-
-static void __init assign_static_memory_11(struct domain *d,
-                                           struct kernel_info *kinfo,
-                                           const struct dt_device_node *node)
-{
-    ASSERT_UNREACHABLE();
-}
 #endif
 
 /*
@@ -1264,17 +1142,10 @@  static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
     return 0;
 }
 
-/*
- * Helper to write an interrupts with the GIC format
- * This code is assuming the irq is an PPI.
- */
-
-typedef __be32 gic_interrupt_t[3];
-
-static void __init set_interrupt(gic_interrupt_t interrupt,
-                                 unsigned int irq,
-                                 unsigned int cpumask,
-                                 unsigned int level)
+void __init set_interrupt(gic_interrupt_t interrupt,
+                          unsigned int irq,
+                          unsigned int cpumask,
+                          unsigned int level)
 {
     __be32 *cells = interrupt;
     bool is_ppi = !!(irq < 32);
@@ -1319,8 +1190,7 @@  static int __init fdt_property_interrupts(const struct kernel_info *kinfo,
  * unit (which contains the physical address) with name to generate a
  * node name.
  */
-static int __init domain_fdt_begin_node(void *fdt, const char *name,
-                                        uint64_t unit)
+int __init domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit)
 {
     /*
      * The size of the buffer to hold the longest possible string (i.e.
@@ -1344,10 +1214,10 @@  static int __init domain_fdt_begin_node(void *fdt, const char *name,
     return fdt_begin_node(fdt, buf);
 }
 
-static int __init make_memory_node(const struct domain *d,
-                                   void *fdt,
-                                   int addrcells, int sizecells,
-                                   struct meminfo *mem)
+int __init make_memory_node(const struct domain *d,
+                            void *fdt,
+                            int addrcells, int sizecells,
+                            struct meminfo *mem)
 {
     unsigned int i;
     int res, reg_size = addrcells + sizecells;
@@ -1483,10 +1353,10 @@  static int __init make_shm_memory_node(const struct domain *d,
 }
 #endif
 
-static int __init make_resv_memory_node(const struct domain *d,
-                                        void *fdt,
-                                        int addrcells, int sizecells,
-                                        const struct meminfo *mem)
+int __init make_resv_memory_node(const struct domain *d,
+                                 void *fdt,
+                                 int addrcells, int sizecells,
+                                 const struct meminfo *mem)
 {
     int res = 0;
     /* Placeholder for reserved-memory\0 */
@@ -1819,9 +1689,9 @@  static int __init find_domU_holes(const struct kernel_info *kinfo,
     return res;
 }
 
-static int __init make_hypervisor_node(struct domain *d,
-                                       const struct kernel_info *kinfo,
-                                       int addrcells, int sizecells)
+int __init make_hypervisor_node(struct domain *d,
+                                const struct kernel_info *kinfo,
+                                int addrcells, int sizecells)
 {
     const char compat[] =
         "xen,xen-" XEN_VERSION_STRING "\0"
@@ -1938,7 +1808,7 @@  static int __init make_hypervisor_node(struct domain *d,
     return res;
 }
 
-static int __init make_psci_node(void *fdt)
+int __init make_psci_node(void *fdt)
 {
     int res;
     const char compat[] =
@@ -1974,7 +1844,7 @@  static int __init make_psci_node(void *fdt)
     return res;
 }
 
-static int __init make_cpus_node(const struct domain *d, void *fdt)
+int __init make_cpus_node(const struct domain *d, void *fdt)
 {
     int res;
     const struct dt_device_node *cpus = dt_find_node_by_path("/cpus");
@@ -2172,7 +2042,7 @@  static int __init make_gic_node(const struct domain *d, void *fdt,
     return res;
 }
 
-static int __init make_timer_node(const struct kernel_info *kinfo)
+int __init make_timer_node(const struct kernel_info *kinfo)
 {
     void *fdt = kinfo->fdt;
     static const struct dt_device_match timer_ids[] __initconst =
@@ -2490,743 +2360,134 @@  static int __init handle_node(struct domain *d, struct kernel_info *kinfo,
     return res;
 }
 
-#ifdef CONFIG_VGICV2
-static int __init make_gicv2_domU_node(struct kernel_info *kinfo)
+static int __init prepare_dtb_hwdom(struct domain *d, struct kernel_info *kinfo)
 {
-    void *fdt = kinfo->fdt;
-    int res = 0;
-    __be32 reg[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) * 2];
-    __be32 *cells;
-    const struct domain *d = kinfo->d;
-
-    res = domain_fdt_begin_node(fdt, "interrupt-controller",
-                                vgic_dist_base(&d->arch.vgic));
-    if ( res )
-        return res;
-
-    res = fdt_property_cell(fdt, "#address-cells", 0);
-    if ( res )
-        return res;
+    const p2m_type_t default_p2mt = p2m_mmio_direct_c;
+    const void *fdt;
+    int new_size;
+    int ret;
 
-    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
-    if ( res )
-        return res;
+    ASSERT(dt_host && (dt_host->sibling == NULL));
 
-    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
-    if ( res )
-        return res;
+    kinfo->phandle_gic = dt_interrupt_controller->phandle;
+    fdt = device_tree_flattened;
 
-    res = fdt_property_string(fdt, "compatible", "arm,gic-400");
-    if ( res )
-        return res;
+    new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
+    kinfo->fdt = xmalloc_bytes(new_size);
+    if ( kinfo->fdt == NULL )
+        return -ENOMEM;
 
-    cells = &reg[0];
-    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
-                       vgic_dist_base(&d->arch.vgic), GUEST_GICD_SIZE);
-    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
-                       vgic_cpu_base(&d->arch.vgic), GUEST_GICC_SIZE);
+    ret = fdt_create(kinfo->fdt, new_size);
+    if ( ret < 0 )
+        goto err;
 
-    res = fdt_property(fdt, "reg", reg, sizeof(reg));
-    if (res)
-        return res;
+    fdt_finish_reservemap(kinfo->fdt);
 
-    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
-    if (res)
-        return res;
+    ret = handle_node(d, kinfo, dt_host, default_p2mt);
+    if ( ret )
+        goto err;
 
-    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
-    if (res)
-        return res;
+    ret = fdt_finish(kinfo->fdt);
+    if ( ret < 0 )
+        goto err;
 
-    res = fdt_end_node(fdt);
+    return 0;
 
-    return res;
+  err:
+    printk("Device tree generation failed (%d).\n", ret);
+    xfree(kinfo->fdt);
+    return -EINVAL;
 }
-#endif
 
-#ifdef CONFIG_GICV3
-static int __init make_gicv3_domU_node(struct kernel_info *kinfo)
+static void __init dtb_load(struct kernel_info *kinfo)
 {
-    void *fdt = kinfo->fdt;
-    int res = 0;
-    __be32 *reg, *cells;
-    const struct domain *d = kinfo->d;
-    unsigned int i, len = 0;
-
-    res = domain_fdt_begin_node(fdt, "interrupt-controller",
-                                vgic_dist_base(&d->arch.vgic));
-    if ( res )
-        return res;
-
-    res = fdt_property_cell(fdt, "#address-cells", 0);
-    if ( res )
-        return res;
-
-    res = fdt_property_cell(fdt, "#interrupt-cells", 3);
-    if ( res )
-        return res;
-
-    res = fdt_property(fdt, "interrupt-controller", NULL, 0);
-    if ( res )
-        return res;
-
-    res = fdt_property_string(fdt, "compatible", "arm,gic-v3");
-    if ( res )
-        return res;
-
-    /* reg specifies all re-distributors and Distributor. */
-    len = (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
-          (d->arch.vgic.nr_regions + 1) * sizeof(__be32);
-    reg = xmalloc_bytes(len);
-    if ( reg == NULL )
-        return -ENOMEM;
-    cells = reg;
-
-    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
-                       vgic_dist_base(&d->arch.vgic), GUEST_GICV3_GICD_SIZE);
-
-    for ( i = 0; i < d->arch.vgic.nr_regions; i++ )
-        dt_child_set_range(&cells,
-                           GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
-                           d->arch.vgic.rdist_regions[i].base,
-                           d->arch.vgic.rdist_regions[i].size);
-
-    res = fdt_property(fdt, "reg", reg, len);
-    xfree(reg);
-    if (res)
-        return res;
-
-    res = fdt_property_cell(fdt, "linux,phandle", kinfo->phandle_gic);
-    if (res)
-        return res;
-
-    res = fdt_property_cell(fdt, "phandle", kinfo->phandle_gic);
-    if (res)
-        return res;
+    unsigned long left;
 
-    res = fdt_end_node(fdt);
+    printk("Loading %pd DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
+           kinfo->d, kinfo->dtb_paddr,
+           kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
 
-    return res;
-}
-#endif
+    left = copy_to_guest_phys_flush_dcache(kinfo->d, kinfo->dtb_paddr,
+                                           kinfo->fdt,
+                                           fdt_totalsize(kinfo->fdt));
 
-static int __init make_gic_domU_node(struct kernel_info *kinfo)
-{
-    switch ( kinfo->d->arch.vgic.version )
-    {
-#ifdef CONFIG_GICV3
-    case GIC_V3:
-        return make_gicv3_domU_node(kinfo);
-#endif
-#ifdef CONFIG_VGICV2
-    case GIC_V2:
-        return make_gicv2_domU_node(kinfo);
-#endif
-    default:
-        panic("Unsupported GIC version\n");
-    }
+    if ( left != 0 )
+        panic("Unable to copy the DTB to %pd memory (left = %lu bytes)\n",
+              kinfo->d, left);
+    xfree(kinfo->fdt);
 }
 
-#ifdef CONFIG_SBSA_VUART_CONSOLE
-static int __init make_vpl011_uart_node(struct kernel_info *kinfo)
+static void __init initrd_load(struct kernel_info *kinfo)
 {
-    void *fdt = kinfo->fdt;
+    const struct bootmodule *mod = kinfo->initrd_bootmodule;
+    paddr_t load_addr = kinfo->initrd_paddr;
+    paddr_t paddr, len;
+    int node;
     int res;
-    gic_interrupt_t intr;
-    __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
-    __be32 *cells;
-    struct domain *d = kinfo->d;
-
-    res = domain_fdt_begin_node(fdt, "sbsa-uart", d->arch.vpl011.base_addr);
-    if ( res )
-        return res;
+    __be32 val[2];
+    __be32 *cellp;
+    void __iomem *initrd;
 
-    res = fdt_property_string(fdt, "compatible", "arm,sbsa-uart");
-    if ( res )
-        return res;
+    if ( !mod || !mod->size )
+        return;
 
-    cells = &reg[0];
-    dt_child_set_range(&cells, GUEST_ROOT_ADDRESS_CELLS,
-                       GUEST_ROOT_SIZE_CELLS, d->arch.vpl011.base_addr,
-                       GUEST_PL011_SIZE);
+    paddr = mod->start;
+    len = mod->size;
 
-    res = fdt_property(fdt, "reg", reg, sizeof(reg));
-    if ( res )
-        return res;
+    printk("Loading %pd initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
+           kinfo->d, paddr, load_addr, load_addr + len);
 
-    set_interrupt(intr, d->arch.vpl011.virq, 0xf, DT_IRQ_TYPE_LEVEL_HIGH);
+    /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
+    node = fdt_path_offset(kinfo->fdt, "/chosen");
+    if ( node < 0 )
+        panic("Cannot find the /chosen node\n");
 
-    res = fdt_property(fdt, "interrupts", intr, sizeof (intr));
+    cellp = (__be32 *)val;
+    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
+    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
+                              val, sizeof(val));
     if ( res )
-        return res;
+        panic("Cannot fix up \"linux,initrd-start\" property\n");
 
-    res = fdt_property_cell(fdt, "interrupt-parent",
-                            kinfo->phandle_gic);
+    cellp = (__be32 *)val;
+    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
+    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
+                              val, sizeof(val));
     if ( res )
-        return res;
+        panic("Cannot fix up \"linux,initrd-end\" property\n");
 
-    /* Use a default baud rate of 115200. */
-    fdt_property_u32(fdt, "current-speed", 115200);
+    initrd = ioremap_wc(paddr, len);
+    if ( !initrd )
+        panic("Unable to map the hwdom initrd\n");
 
-    res = fdt_end_node(fdt);
-    if ( res )
-        return res;
+    res = copy_to_guest_phys_flush_dcache(kinfo->d, load_addr,
+                                          initrd, len);
+    if ( res != 0 )
+        panic("Unable to copy the initrd in the hwdom memory\n");
 
-    return 0;
+    iounmap(initrd);
 }
-#endif
 
 /*
- * Scan device tree properties for passthrough specific information.
- * Returns < 0 on error
- *         0 on success
+ * Allocate the event channel PPIs and setup the HVM_PARAM_CALLBACK_IRQ.
+ * The allocated IRQ will be found in d->arch.evtchn_irq.
+ *
+ * Note that this should only be called once all PPIs used by the
+ * hardware domain have been registered.
  */
-static int __init handle_passthrough_prop(struct kernel_info *kinfo,
-                                          const struct fdt_property *xen_reg,
-                                          const struct fdt_property *xen_path,
-                                          bool xen_force,
-                                          uint32_t address_cells, uint32_t size_cells)
+void __init evtchn_allocate(struct domain *d)
 {
-    const __be32 *cell;
-    unsigned int i, len;
-    struct dt_device_node *node;
     int res;
-    paddr_t mstart, size, gstart;
-
-    /* xen,reg specifies where to map the MMIO region */
-    cell = (const __be32 *)xen_reg->data;
-    len = fdt32_to_cpu(xen_reg->len) / ((address_cells * 2 + size_cells) *
-                                        sizeof(uint32_t));
+    u64 val;
 
-    for ( i = 0; i < len; i++ )
-    {
-        device_tree_get_reg(&cell, address_cells, size_cells,
-                            &mstart, &size);
-        gstart = dt_next_cell(address_cells, &cell);
+    res = vgic_allocate_ppi(d);
+    if ( res < 0 )
+        panic("Unable to allocate a PPI for the event channel interrupt\n");
 
-        if ( gstart & ~PAGE_MASK || mstart & ~PAGE_MASK || size & ~PAGE_MASK )
-        {
-            printk(XENLOG_ERR
-                    "DomU passthrough config has not page aligned addresses/sizes\n");
-            return -EINVAL;
-        }
+    d->arch.evtchn_irq = res;
 
-        res = iomem_permit_access(kinfo->d, paddr_to_pfn(mstart),
-                                  paddr_to_pfn(PAGE_ALIGN(mstart + size - 1)));
-        if ( res )
-        {
-            printk(XENLOG_ERR "Unable to permit to dom%d access to"
-                   " 0x%"PRIpaddr" - 0x%"PRIpaddr"\n",
-                   kinfo->d->domain_id,
-                   mstart & PAGE_MASK, PAGE_ALIGN(mstart + size) - 1);
-            return res;
-        }
-
-        res = map_regions_p2mt(kinfo->d,
-                               gaddr_to_gfn(gstart),
-                               PFN_DOWN(size),
-                               maddr_to_mfn(mstart),
-                               p2m_mmio_direct_dev);
-        if ( res < 0 )
-        {
-            printk(XENLOG_ERR
-                   "Failed to map %"PRIpaddr" to the guest at%"PRIpaddr"\n",
-                   mstart, gstart);
-            return -EFAULT;
-        }
-    }
-
-    /*
-     * If xen_force, we let the user assign a MMIO region with no
-     * associated path.
-     */
-    if ( xen_path == NULL )
-        return xen_force ? 0 : -EINVAL;
-
-    /*
-     * xen,path specifies the corresponding node in the host DT.
-     * Both interrupt mappings and IOMMU settings are based on it,
-     * as they are done based on the corresponding host DT node.
-     */
-    node = dt_find_node_by_path(xen_path->data);
-    if ( node == NULL )
-    {
-        printk(XENLOG_ERR "Couldn't find node %s in host_dt!\n",
-               (char *)xen_path->data);
-        return -EINVAL;
-    }
-
-    res = map_device_irqs_to_domain(kinfo->d, node, true, NULL);
-    if ( res < 0 )
-        return res;
-
-    res = iommu_add_dt_device(node);
-    if ( res < 0 )
-        return res;
-
-    /* If xen_force, we allow assignment of devices without IOMMU protection. */
-    if ( xen_force && !dt_device_is_protected(node) )
-        return 0;
-
-    return iommu_assign_dt_device(kinfo->d, node);
-}
-
-static int __init handle_prop_pfdt(struct kernel_info *kinfo,
-                                   const void *pfdt, int nodeoff,
-                                   uint32_t address_cells, uint32_t size_cells,
-                                   bool scan_passthrough_prop)
-{
-    void *fdt = kinfo->fdt;
-    int propoff, nameoff, res;
-    const struct fdt_property *prop, *xen_reg = NULL, *xen_path = NULL;
-    const char *name;
-    bool found, xen_force = false;
-
-    for ( propoff = fdt_first_property_offset(pfdt, nodeoff);
-          propoff >= 0;
-          propoff = fdt_next_property_offset(pfdt, propoff) )
-    {
-        if ( !(prop = fdt_get_property_by_offset(pfdt, propoff, NULL)) )
-            return -FDT_ERR_INTERNAL;
-
-        found = false;
-        nameoff = fdt32_to_cpu(prop->nameoff);
-        name = fdt_string(pfdt, nameoff);
-
-        if ( scan_passthrough_prop )
-        {
-            if ( dt_prop_cmp("xen,reg", name) == 0 )
-            {
-                xen_reg = prop;
-                found = true;
-            }
-            else if ( dt_prop_cmp("xen,path", name) == 0 )
-            {
-                xen_path = prop;
-                found = true;
-            }
-            else if ( dt_prop_cmp("xen,force-assign-without-iommu",
-                                  name) == 0 )
-            {
-                xen_force = true;
-                found = true;
-            }
-        }
-
-        /*
-         * Copy properties other than the ones above: xen,reg, xen,path,
-         * and xen,force-assign-without-iommu.
-         */
-        if ( !found )
-        {
-            res = fdt_property(fdt, name, prop->data, fdt32_to_cpu(prop->len));
-            if ( res )
-                return res;
-        }
-    }
-
-    /*
-     * Only handle passthrough properties if both xen,reg and xen,path
-     * are present, or if xen,force-assign-without-iommu is specified.
-     */
-    if ( xen_reg != NULL && (xen_path != NULL || xen_force) )
-    {
-        res = handle_passthrough_prop(kinfo, xen_reg, xen_path, xen_force,
-                                      address_cells, size_cells);
-        if ( res < 0 )
-        {
-            printk(XENLOG_ERR "Failed to assign device to %pd\n", kinfo->d);
-            return res;
-        }
-    }
-    else if ( (xen_path && !xen_reg) || (xen_reg && !xen_path && !xen_force) )
-    {
-        printk(XENLOG_ERR "xen,reg or xen,path missing for %pd\n",
-               kinfo->d);
-        return -EINVAL;
-    }
-
-    /* FDT_ERR_NOTFOUND => There is no more properties for this node */
-    return ( propoff != -FDT_ERR_NOTFOUND ) ? propoff : 0;
-}
-
-static int __init scan_pfdt_node(struct kernel_info *kinfo, const void *pfdt,
-                                 int nodeoff,
-                                 uint32_t address_cells, uint32_t size_cells,
-                                 bool scan_passthrough_prop)
-{
-    int rc = 0;
-    void *fdt = kinfo->fdt;
-    int node_next;
-
-    rc = fdt_begin_node(fdt, fdt_get_name(pfdt, nodeoff, NULL));
-    if ( rc )
-        return rc;
-
-    rc = handle_prop_pfdt(kinfo, pfdt, nodeoff, address_cells, size_cells,
-                          scan_passthrough_prop);
-    if ( rc )
-        return rc;
-
-    address_cells = device_tree_get_u32(pfdt, nodeoff, "#address-cells",
-                                        DT_ROOT_NODE_ADDR_CELLS_DEFAULT);
-    size_cells = device_tree_get_u32(pfdt, nodeoff, "#size-cells",
-                                     DT_ROOT_NODE_SIZE_CELLS_DEFAULT);
-
-    node_next = fdt_first_subnode(pfdt, nodeoff);
-    while ( node_next > 0 )
-    {
-        scan_pfdt_node(kinfo, pfdt, node_next, address_cells, size_cells,
-                       scan_passthrough_prop);
-        node_next = fdt_next_subnode(pfdt, node_next);
-    }
-
-    return fdt_end_node(fdt);
-}
-
-static int __init check_partial_fdt(void *pfdt, size_t size)
-{
-    int res;
-
-    if ( fdt_magic(pfdt) != FDT_MAGIC )
-    {
-        dprintk(XENLOG_ERR, "Partial FDT is not a valid Flat Device Tree");
-        return -EINVAL;
-    }
-
-    res = fdt_check_header(pfdt);
-    if ( res )
-    {
-        dprintk(XENLOG_ERR, "Failed to check the partial FDT (%d)", res);
-        return -EINVAL;
-    }
-
-    if ( fdt_totalsize(pfdt) > size )
-    {
-        dprintk(XENLOG_ERR, "Partial FDT totalsize is too big");
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static int __init domain_handle_dtb_bootmodule(struct domain *d,
-                                               struct kernel_info *kinfo)
-{
-    void *pfdt;
-    int res, node_next;
-
-    pfdt = ioremap_cache(kinfo->dtb_bootmodule->start,
-                         kinfo->dtb_bootmodule->size);
-    if ( pfdt == NULL )
-        return -EFAULT;
-
-    res = check_partial_fdt(pfdt, kinfo->dtb_bootmodule->size);
-    if ( res < 0 )
-        goto out;
-
-    for ( node_next = fdt_first_subnode(pfdt, 0); 
-          node_next > 0;
-          node_next = fdt_next_subnode(pfdt, node_next) )
-    {
-        const char *name = fdt_get_name(pfdt, node_next, NULL);
-
-        if ( name == NULL )
-            continue;
-
-        /*
-         * Only scan /gic /aliases /passthrough, ignore the rest.
-         * They don't have to be parsed in order.
-         *
-         * Take the GIC phandle value from the special /gic node in the
-         * DTB fragment.
-         */
-        if ( dt_node_cmp(name, "gic") == 0 )
-        {
-            kinfo->phandle_gic = fdt_get_phandle(pfdt, node_next);
-            continue;
-        }
-
-        if ( dt_node_cmp(name, "aliases") == 0 )
-        {
-            res = scan_pfdt_node(kinfo, pfdt, node_next,
-                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
-                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
-                                 false);
-            if ( res )
-                goto out;
-            continue;
-        }
-        if ( dt_node_cmp(name, "passthrough") == 0 )
-        {
-            res = scan_pfdt_node(kinfo, pfdt, node_next,
-                                 DT_ROOT_NODE_ADDR_CELLS_DEFAULT,
-                                 DT_ROOT_NODE_SIZE_CELLS_DEFAULT,
-                                 true);
-            if ( res )
-                goto out;
-            continue;
-        }
-    }
-
- out:
-    iounmap(pfdt);
-
-    return res;
-}
-
-/*
- * The max size for DT is 2MB. However, the generated DT is small (not including
- * domU passthrough DT nodes whose size we account separately), 4KB are enough
- * for now, but we might have to increase it in the future.
- */
-#define DOMU_DTB_SIZE 4096
-static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
-{
-    int addrcells, sizecells;
-    int ret, fdt_size = DOMU_DTB_SIZE;
-
-    kinfo->phandle_gic = GUEST_PHANDLE_GIC;
-    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
-    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
-
-    addrcells = GUEST_ROOT_ADDRESS_CELLS;
-    sizecells = GUEST_ROOT_SIZE_CELLS;
-
-    /* Account for domU passthrough DT size */
-    if ( kinfo->dtb_bootmodule )
-        fdt_size += kinfo->dtb_bootmodule->size;
-
-    /* Cap to max DT size if needed */
-    fdt_size = min(fdt_size, SZ_2M);
-
-    kinfo->fdt = xmalloc_bytes(fdt_size);
-    if ( kinfo->fdt == NULL )
-        return -ENOMEM;
-
-    ret = fdt_create(kinfo->fdt, fdt_size);
-    if ( ret < 0 )
-        goto err;
-
-    ret = fdt_finish_reservemap(kinfo->fdt);
-    if ( ret < 0 )
-        goto err;
-
-    ret = fdt_begin_node(kinfo->fdt, "");
-    if ( ret < 0 )
-        goto err;
-
-    ret = fdt_property_cell(kinfo->fdt, "#address-cells", addrcells);
-    if ( ret )
-        goto err;
-
-    ret = fdt_property_cell(kinfo->fdt, "#size-cells", sizecells);
-    if ( ret )
-        goto err;
-
-    ret = make_chosen_node(kinfo);
-    if ( ret )
-        goto err;
-
-    ret = make_psci_node(kinfo->fdt);
-    if ( ret )
-        goto err;
-
-    ret = make_cpus_node(d, kinfo->fdt);
-    if ( ret )
-        goto err;
-
-    ret = make_memory_node(d, kinfo->fdt, addrcells, sizecells, &kinfo->mem);
-    if ( ret )
-        goto err;
-
-    ret = make_resv_memory_node(d, kinfo->fdt, addrcells, sizecells,
-                                &kinfo->shm_mem);
-    if ( ret )
-        goto err;
-
-    /*
-     * domain_handle_dtb_bootmodule has to be called before the rest of
-     * the device tree is generated because it depends on the value of
-     * the field phandle_gic.
-     */
-    if ( kinfo->dtb_bootmodule )
-    {
-        ret = domain_handle_dtb_bootmodule(d, kinfo);
-        if ( ret )
-            goto err;
-    }
-
-    ret = make_gic_domU_node(kinfo);
-    if ( ret )
-        goto err;
-
-    ret = make_timer_node(kinfo);
-    if ( ret )
-        goto err;
-
-    if ( kinfo->vpl011 )
-    {
-        ret = -EINVAL;
-#ifdef CONFIG_SBSA_VUART_CONSOLE
-        ret = make_vpl011_uart_node(kinfo);
-#endif
-        if ( ret )
-            goto err;
-    }
-
-    if ( kinfo->dom0less_feature & DOM0LESS_ENHANCED_NO_XS )
-    {
-        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
-        if ( ret )
-            goto err;
-    }
-
-    ret = fdt_end_node(kinfo->fdt);
-    if ( ret < 0 )
-        goto err;
-
-    ret = fdt_finish(kinfo->fdt);
-    if ( ret < 0 )
-        goto err;
-
-    return 0;
-
-  err:
-    printk("Device tree generation failed (%d).\n", ret);
-    xfree(kinfo->fdt);
-
-    return -EINVAL;
-}
-
-static int __init prepare_dtb_hwdom(struct domain *d, struct kernel_info *kinfo)
-{
-    const p2m_type_t default_p2mt = p2m_mmio_direct_c;
-    const void *fdt;
-    int new_size;
-    int ret;
-
-    ASSERT(dt_host && (dt_host->sibling == NULL));
-
-    kinfo->phandle_gic = dt_interrupt_controller->phandle;
-    fdt = device_tree_flattened;
-
-    new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
-    kinfo->fdt = xmalloc_bytes(new_size);
-    if ( kinfo->fdt == NULL )
-        return -ENOMEM;
-
-    ret = fdt_create(kinfo->fdt, new_size);
-    if ( ret < 0 )
-        goto err;
-
-    fdt_finish_reservemap(kinfo->fdt);
-
-    ret = handle_node(d, kinfo, dt_host, default_p2mt);
-    if ( ret )
-        goto err;
-
-    ret = fdt_finish(kinfo->fdt);
-    if ( ret < 0 )
-        goto err;
-
-    return 0;
-
-  err:
-    printk("Device tree generation failed (%d).\n", ret);
-    xfree(kinfo->fdt);
-    return -EINVAL;
-}
-
-static void __init dtb_load(struct kernel_info *kinfo)
-{
-    unsigned long left;
-
-    printk("Loading %pd DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
-           kinfo->d, kinfo->dtb_paddr,
-           kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
-
-    left = copy_to_guest_phys_flush_dcache(kinfo->d, kinfo->dtb_paddr,
-                                           kinfo->fdt,
-                                           fdt_totalsize(kinfo->fdt));
-
-    if ( left != 0 )
-        panic("Unable to copy the DTB to %pd memory (left = %lu bytes)\n",
-              kinfo->d, left);
-    xfree(kinfo->fdt);
-}
-
-static void __init initrd_load(struct kernel_info *kinfo)
-{
-    const struct bootmodule *mod = kinfo->initrd_bootmodule;
-    paddr_t load_addr = kinfo->initrd_paddr;
-    paddr_t paddr, len;
-    int node;
-    int res;
-    __be32 val[2];
-    __be32 *cellp;
-    void __iomem *initrd;
-
-    if ( !mod || !mod->size )
-        return;
-
-    paddr = mod->start;
-    len = mod->size;
-
-    printk("Loading %pd initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
-           kinfo->d, paddr, load_addr, load_addr + len);
-
-    /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
-    node = fdt_path_offset(kinfo->fdt, "/chosen");
-    if ( node < 0 )
-        panic("Cannot find the /chosen node\n");
-
-    cellp = (__be32 *)val;
-    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
-    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
-                              val, sizeof(val));
-    if ( res )
-        panic("Cannot fix up \"linux,initrd-start\" property\n");
-
-    cellp = (__be32 *)val;
-    dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
-    res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
-                              val, sizeof(val));
-    if ( res )
-        panic("Cannot fix up \"linux,initrd-end\" property\n");
-
-    initrd = ioremap_wc(paddr, len);
-    if ( !initrd )
-        panic("Unable to map the hwdom initrd\n");
-
-    res = copy_to_guest_phys_flush_dcache(kinfo->d, load_addr,
-                                          initrd, len);
-    if ( res != 0 )
-        panic("Unable to copy the initrd in the hwdom memory\n");
-
-    iounmap(initrd);
-}
-
-/*
- * Allocate the event channel PPIs and setup the HVM_PARAM_CALLBACK_IRQ.
- * The allocated IRQ will be found in d->arch.evtchn_irq.
- *
- * Note that this should only be called once all PPIs used by the
- * hardware domain have been registered.
- */
-void __init evtchn_allocate(struct domain *d)
-{
-    int res;
-    u64 val;
-
-    res = vgic_allocate_ppi(d);
-    if ( res < 0 )
-        panic("Unable to allocate a PPI for the event channel interrupt\n");
-
-    d->arch.evtchn_irq = res;
-
-    printk("Allocating PPI %u for event channel interrupt\n",
-           d->arch.evtchn_irq);
+    printk("Allocating PPI %u for event channel interrupt\n",
+           d->arch.evtchn_irq);
 
     /* Set the value of domain param HVM_PARAM_CALLBACK_IRQ */
     val = MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI,
@@ -3409,22 +2670,7 @@  static void __init find_gnttab_region(struct domain *d,
            kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
 }
 
-static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
-                                             unsigned int smp_cpus)
-{
-    /*
-     * Keep in sync with libxl__get_required_paging_memory().
-     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
-     * plus 128 pages to cover extended regions.
-     */
-    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
-
-    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
-
-    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
-}
-
-static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
+int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
 {
     unsigned int i;
     struct vcpu *v = d->vcpu[0];
@@ -3515,296 +2761,6 @@  static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
     return 0;
 }
 
-static int __init alloc_xenstore_evtchn(struct domain *d)
-{
-    evtchn_alloc_unbound_t alloc;
-    int rc;
-
-    alloc.dom = d->domain_id;
-    alloc.remote_dom = hardware_domain->domain_id;
-    rc = evtchn_alloc_unbound(&alloc, 0);
-    if ( rc )
-    {
-        printk("Failed allocating event channel for domain\n");
-        return rc;
-    }
-
-    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
-
-    return 0;
-}
-
-static int __init construct_domU(struct domain *d,
-                                 const struct dt_device_node *node)
-{
-    struct kernel_info kinfo = {};
-    const char *dom0less_enhanced;
-    int rc;
-    u64 mem;
-    u32 p2m_mem_mb;
-    unsigned long p2m_pages;
-
-    rc = dt_property_read_u64(node, "memory", &mem);
-    if ( !rc )
-    {
-        printk("Error building DomU: cannot read \"memory\" property\n");
-        return -EINVAL;
-    }
-    kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
-
-    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
-    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
-    p2m_pages = rc ?
-                p2m_mem_mb << (20 - PAGE_SHIFT) :
-                domain_p2m_pages(mem, d->max_vcpus);
-
-    spin_lock(&d->arch.paging.lock);
-    rc = p2m_set_allocation(d, p2m_pages, NULL);
-    spin_unlock(&d->arch.paging.lock);
-    if ( rc != 0 )
-        return rc;
-
-    printk("*** LOADING DOMU cpus=%u memory=%#"PRIx64"KB ***\n",
-           d->max_vcpus, mem);
-
-    kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
-
-    rc = dt_property_read_string(node, "xen,enhanced", &dom0less_enhanced);
-    if ( rc == -EILSEQ ||
-         rc == -ENODATA ||
-         (rc == 0 && !strcmp(dom0less_enhanced, "enabled")) )
-    {
-        if ( hardware_domain )
-            kinfo.dom0less_feature = DOM0LESS_ENHANCED;
-        else
-            panic("At the moment, Xenstore support requires dom0 to be present\n");
-    }
-    else if ( rc == 0 && !strcmp(dom0less_enhanced, "no-xenstore") )
-        kinfo.dom0less_feature = DOM0LESS_ENHANCED_NO_XS;
-
-    if ( vcpu_create(d, 0) == NULL )
-        return -ENOMEM;
-
-    d->max_pages = ((paddr_t)mem * SZ_1K) >> PAGE_SHIFT;
-
-    kinfo.d = d;
-
-    rc = kernel_probe(&kinfo, node);
-    if ( rc < 0 )
-        return rc;
-
-#ifdef CONFIG_ARM_64
-    /* type must be set before allocate memory */
-    d->arch.type = kinfo.type;
-#endif
-    if ( !dt_find_property(node, "xen,static-mem", NULL) )
-        allocate_memory(d, &kinfo);
-    else if ( !is_domain_direct_mapped(d) )
-        allocate_static_memory(d, &kinfo, node);
-    else
-        assign_static_memory_11(d, &kinfo, node);
-
-#ifdef CONFIG_STATIC_SHM
-    rc = process_shm(d, &kinfo, node);
-    if ( rc < 0 )
-        return rc;
-#endif
-
-    /*
-     * Base address and irq number are needed when creating vpl011 device
-     * tree node in prepare_dtb_domU, so initialization on related variables
-     * shall be done first.
-     */
-    if ( kinfo.vpl011 )
-    {
-        rc = domain_vpl011_init(d, NULL);
-        if ( rc < 0 )
-            return rc;
-    }
-
-    rc = prepare_dtb_domU(d, &kinfo);
-    if ( rc < 0 )
-        return rc;
-
-    rc = construct_domain(d, &kinfo);
-    if ( rc < 0 )
-        return rc;
-
-    if ( kinfo.dom0less_feature & DOM0LESS_XENSTORE )
-    {
-        ASSERT(hardware_domain);
-        rc = alloc_xenstore_evtchn(d);
-        if ( rc < 0 )
-            return rc;
-        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
-    }
-
-    return rc;
-}
-
-void __init create_domUs(void)
-{
-    struct dt_device_node *node;
-    const struct dt_device_node *cpupool_node,
-                                *chosen = dt_find_node_by_path("/chosen");
-
-    BUG_ON(chosen == NULL);
-    dt_for_each_child_node(chosen, node)
-    {
-        struct domain *d;
-        struct xen_domctl_createdomain d_cfg = {
-            .arch.gic_version = XEN_DOMCTL_CONFIG_GIC_NATIVE,
-            .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
-            /*
-             * The default of 1023 should be sufficient for guests because
-             * on ARM we don't bind physical interrupts to event channels.
-             * The only use of the evtchn port is inter-domain communications.
-             * 1023 is also the default value used in libxl.
-             */
-            .max_evtchn_port = 1023,
-            .max_grant_frames = -1,
-            .max_maptrack_frames = -1,
-            .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
-        };
-        unsigned int flags = 0U;
-        uint32_t val;
-        int rc;
-
-        if ( !dt_device_is_compatible(node, "xen,domain") )
-            continue;
-
-        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
-            panic("No more domain IDs available\n");
-
-        if ( dt_find_property(node, "xen,static-mem", NULL) )
-            flags |= CDF_staticmem;
-
-        if ( dt_property_read_bool(node, "direct-map") )
-        {
-            if ( !(flags & CDF_staticmem) )
-                panic("direct-map is not valid for domain %s without static allocation.\n",
-                      dt_node_name(node));
-
-            flags |= CDF_directmap;
-        }
-
-        if ( !dt_property_read_u32(node, "cpus", &d_cfg.max_vcpus) )
-            panic("Missing property 'cpus' for domain %s\n",
-                  dt_node_name(node));
-
-        if ( dt_find_compatible_node(node, NULL, "multiboot,device-tree") &&
-             iommu_enabled )
-            d_cfg.flags |= XEN_DOMCTL_CDF_iommu;
-
-        if ( !dt_property_read_u32(node, "nr_spis", &d_cfg.arch.nr_spis) )
-        {
-            int vpl011_virq = GUEST_VPL011_SPI;
-
-            d_cfg.arch.nr_spis = gic_number_lines() - 32;
-
-            /*
-             * The VPL011 virq is GUEST_VPL011_SPI, unless direct-map is
-             * set, in which case it'll match the hardware.
-             *
-             * Since the domain is not yet created, we can't use
-             * d->arch.vpl011.irq. So the logic to find the vIRQ has to
-             * be hardcoded.
-             * The logic here shall be consistent with the one in
-             * domain_vpl011_init().
-             */
-            if ( flags & CDF_directmap )
-            {
-                vpl011_virq = serial_irq(SERHND_DTUART);
-                if ( vpl011_virq < 0 )
-                    panic("Error getting IRQ number for this serial port %d\n",
-                          SERHND_DTUART);
-            }
-
-            /*
-             * vpl011 uses one emulated SPI. If vpl011 is requested, make
-             * sure that we allocate enough SPIs for it.
-             */
-            if ( dt_property_read_bool(node, "vpl011") )
-                d_cfg.arch.nr_spis = MAX(d_cfg.arch.nr_spis,
-                                         vpl011_virq - 32 + 1);
-        }
-
-        /* Get the optional property domain-cpupool */
-        cpupool_node = dt_parse_phandle(node, "domain-cpupool", 0);
-        if ( cpupool_node )
-        {
-            int pool_id = btcpupools_get_domain_pool_id(cpupool_node);
-            if ( pool_id < 0 )
-                panic("Error getting cpupool id from domain-cpupool (%d)\n",
-                      pool_id);
-            d_cfg.cpupool_id = pool_id;
-        }
-
-        if ( dt_property_read_u32(node, "max_grant_version", &val) )
-            d_cfg.grant_opts = XEN_DOMCTL_GRANT_version(val);
-
-        if ( dt_property_read_u32(node, "max_grant_frames", &val) )
-        {
-            if ( val > INT32_MAX )
-                panic("max_grant_frames (%"PRIu32") overflow\n", val);
-            d_cfg.max_grant_frames = val;
-        }
-
-        if ( dt_property_read_u32(node, "max_maptrack_frames", &val) )
-        {
-            if ( val > INT32_MAX )
-                panic("max_maptrack_frames (%"PRIu32") overflow\n", val);
-            d_cfg.max_maptrack_frames = val;
-        }
-
-        if ( dt_get_property(node, "sve", &val) )
-        {
-#ifdef CONFIG_ARM64_SVE
-            unsigned int sve_vl_bits;
-            bool ret = false;
-
-            if ( !val )
-            {
-                /* Property found with no value, means max HW VL supported */
-                ret = sve_domctl_vl_param(-1, &sve_vl_bits);
-            }
-            else
-            {
-                if ( dt_property_read_u32(node, "sve", &val) )
-                    ret = sve_domctl_vl_param(val, &sve_vl_bits);
-                else
-                    panic("Error reading 'sve' property\n");
-            }
-
-            if ( ret )
-                d_cfg.arch.sve_vl = sve_encode_vl(sve_vl_bits);
-            else
-                panic("SVE vector length error\n");
-#else
-            panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n");
-#endif
-        }
-
-        /*
-         * The variable max_init_domid is initialized with zero, so here it's
-         * very important to use the pre-increment operator to call
-         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
-         */
-        d = domain_create(++max_init_domid, &d_cfg, flags);
-        if ( IS_ERR(d) )
-            panic("Error creating domain %s (rc = %ld)\n",
-                  dt_node_name(node), PTR_ERR(d));
-
-        d->is_console = true;
-        dt_device_set_used_by(node, d->domain_id);
-
-        rc = construct_domU(d, node);
-        if ( rc )
-            panic("Could not set up domain %s (rc = %d)\n",
-                  dt_node_name(node), rc);
-    }
-}
-
 static int __init construct_dom0(struct domain *d)
 {
     struct kernel_info kinfo = {};
diff --git a/xen/arch/arm/include/asm/dom0less-build.h b/xen/arch/arm/include/asm/dom0less-build.h
new file mode 100644
index 000000000000..0d28fa1bee46
--- /dev/null
+++ b/xen/arch/arm/include/asm/dom0less-build.h
@@ -0,0 +1,25 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * xen/arch/arm/include/asm/dom0less-build.h
+ *
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef __ARM_DOM0LESS_BUILD_H_
+#define __ARM_DOM0LESS_BUILD_H_
+
+#include <asm/kernel.h>
+
+void create_domUs(void);
+bool is_dom0less_mode(void);
+
+#endif  /* __ARM_DOM0LESS_BUILD_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:b
+ */
diff --git a/xen/arch/arm/include/asm/domain_build.h b/xen/arch/arm/include/asm/domain_build.h
index b9329c9ee032..d95cf376048f 100644
--- a/xen/arch/arm/include/asm/domain_build.h
+++ b/xen/arch/arm/include/asm/domain_build.h
@@ -4,9 +4,45 @@ 
 #include <xen/sched.h>
 #include <asm/kernel.h>
 
+typedef __be32 gic_interrupt_t[3];
+
+int construct_domain(struct domain *d, struct kernel_info *kinfo);
+int domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit);
+int make_resv_memory_node(const struct domain *d, void *fdt, int addrcells,
+                          int sizecells, const struct meminfo *mem);
 int make_chosen_node(const struct kernel_info *kinfo);
+int make_cpus_node(const struct domain *d, void *fdt);
+int make_hypervisor_node(struct domain *d, const struct kernel_info *kinfo,
+                         int addrcells, int sizecells);
+int make_memory_node(const struct domain *d, void *fdt, int addrcells,
+                     int sizecells, struct meminfo *mem);
+int make_psci_node(void *fdt);
+int make_timer_node(const struct kernel_info *kinfo);
 void evtchn_allocate(struct domain *d);
 
+unsigned int get_allocation_size(paddr_t size);
+
+/*
+ * handle_device_interrupts retrieves the interrupts configuration from
+ * a device tree node and maps those interrupts to the target domain.
+ *
+ * Returns:
+ *   < 0 error
+ *   0   success
+ */
+int handle_device_interrupts(struct domain *d, struct dt_device_node *dev,
+                             bool need_mapping);
+
+/*
+ * Helper to write an interrupts with the GIC format
+ * This code is assuming the irq is an PPI.
+ */
+void set_interrupt(gic_interrupt_t interrupt, unsigned int irq,
+                   unsigned int cpumask, unsigned int level);
+
+int process_shm(struct domain *d, struct kernel_info *kinfo,
+                const struct dt_device_node *node);
+
 #ifndef CONFIG_ACPI
 static inline int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
 {
@@ -17,6 +53,28 @@  static inline int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
 #else
 int prepare_acpi(struct domain *d, struct kernel_info *kinfo);
 #endif
+
+#ifdef CONFIG_STATIC_MEMORY
+void allocate_static_memory(struct domain *d, struct kernel_info *kinfo,
+                            const struct dt_device_node *node);
+void assign_static_memory_11(struct domain *d, struct kernel_info *kinfo,
+                             const struct dt_device_node *node);
+#else
+static inline void allocate_static_memory(struct domain *d,
+                                          struct kernel_info *kinfo,
+                                          const struct dt_device_node *node)
+{
+    ASSERT_UNREACHABLE();
+}
+
+static inline void assign_static_memory_11(struct domain *d,
+                                           struct kernel_info *kinfo,
+                                           const struct dt_device_node *node)
+{
+    ASSERT_UNREACHABLE();
+}
+#endif
+
 #endif
 
 /*
diff --git a/xen/arch/arm/include/asm/setup.h b/xen/arch/arm/include/asm/setup.h
index b8866c20f462..cd4c7815790b 100644
--- a/xen/arch/arm/include/asm/setup.h
+++ b/xen/arch/arm/include/asm/setup.h
@@ -135,7 +135,6 @@  void acpi_create_efi_mmap_table(struct domain *d,
 
 int acpi_make_efi_nodes(void *fdt, struct membank tbl_add[]);
 
-void create_domUs(void);
 void create_dom0(void);
 void alloc_static_evtchn(void);
 
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index db748839d383..db0299d540dc 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -34,6 +34,7 @@ 
 #include <xen/warning.h>
 #include <xen/hypercall.h>
 #include <asm/alternative.h>
+#include <asm/dom0less-build.h>
 #include <asm/page.h>
 #include <asm/current.h>
 #include <asm/setup.h>
@@ -1042,38 +1043,6 @@  static void __init setup_mm(void)
 }
 #endif
 
-static bool __init is_dom0less_mode(void)
-{
-    struct bootmodules *mods = &bootinfo.modules;
-    struct bootmodule *mod;
-    unsigned int i;
-    bool dom0found = false;
-    bool domUfound = false;
-
-    /* Look into the bootmodules */
-    for ( i = 0 ; i < mods->nr_mods ; i++ )
-    {
-        mod = &mods->module[i];
-        /* Find if dom0 and domU kernels are present */
-        if ( mod->kind == BOOTMOD_KERNEL )
-        {
-            if ( mod->domU == false )
-            {
-                dom0found = true;
-                break;
-            }
-            else
-                domUfound = true;
-        }
-    }
-
-    /*
-     * If there is no dom0 kernel but at least one domU, then we are in
-     * dom0less mode
-     */
-    return ( !dom0found && domUfound );
-}
-
 size_t __read_mostly dcache_line_bytes;
 
 /* C entry point for boot CPU */