diff mbox series

[10/16] xen/arm: add Persistent Map (PMAP) infrastructure

Message ID 20220520120937.28925-11-julien@xen.org (mailing list archive)
State New, archived
Headers show
Series xen/arm: mm: Remove open-coding mappings | expand

Commit Message

Julien Grall May 20, 2022, 12:09 p.m. UTC
From: Wei Liu <wei.liu2@citrix.com>

The basic idea is like Persistent Kernel Map (PKMAP) in Linux. We
pre-populate all the relevant page tables before the system is fully
set up.

We will need it on Arm in order to rework the arm64 version of
xenheap_setup_mappings() as we may need to use pages allocated from
the boot allocator before they are effectively mapped.

This infrastructure is not lock-protected therefore can only be used
before smpboot. After smpboot, map_domain_page() has to be used.

This is based on the x86 version [1] that was originally implemented
by Wei Liu.

The PMAP infrastructure is implemented in common code with some
arch helpers to set/clear the page-table entries and convertion
between a fixmap slot to a virtual address...

As mfn_to_xen_entry() now needs to be exported, take the opportunity
to swich the parameter attr from unsigned to unsigned int.

[1] <e92da4ad6015b6089737fcccba3ec1d6424649a5.1588278317.git.hongyxia@amazon.com>

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
[julien: Adapted for Arm]
Signed-off-by: Julien Grall <jgrall@amazon.com>

---
    Changes in v4:
        - Move xen_fixmap in fixmap.h and add a comment about its usage.
        - Update comments
        - Use DECLARE_BITMAP()
        - Replace local_irq_{enable, disable} with an ASSERT() as there
          should be no user of pmap() in interrupt context.

    Changes in v3:
        - s/BITS_PER_LONG/BITS_PER_BYTE/
        - Move pmap to common code

    Changes in v2:
        - New patch

Cc: Jan Beulich <jbeulich@suse.com>
Cc: Wei Liu <wl@xen.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/arm/Kconfig              |  1 +
 xen/arch/arm/include/asm/fixmap.h | 24 +++++++++++
 xen/arch/arm/include/asm/lpae.h   |  8 ++++
 xen/arch/arm/include/asm/pmap.h   | 32 ++++++++++++++
 xen/arch/arm/mm.c                 |  7 +--
 xen/common/Kconfig                |  3 ++
 xen/common/Makefile               |  1 +
 xen/common/pmap.c                 | 72 +++++++++++++++++++++++++++++++
 xen/include/xen/pmap.h            | 16 +++++++
 9 files changed, 158 insertions(+), 6 deletions(-)
 create mode 100644 xen/arch/arm/include/asm/pmap.h
 create mode 100644 xen/common/pmap.c
 create mode 100644 xen/include/xen/pmap.h

Comments

Julien Grall May 21, 2022, 11:38 a.m. UTC | #1
Hi,

On 20/05/2022 13:09, Julien Grall wrote:
> +void __init pmap_unmap(const void *p)
> +{
> +    unsigned int idx;
> +    unsigned int slot = virt_to_fix((unsigned long)p);
> +
> +    ASSERT(system_state < SYS_STATE_smp_boot);
> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
> +    ASSERT(in_irq());

This needs to be ASSERT(!in_irq()).

Cheers,
Wei Chen May 24, 2022, 2:11 a.m. UTC | #2
Hi Julien,

On 2022/5/20 20:09, Julien Grall wrote:
> From: Wei Liu <wei.liu2@citrix.com>
> 
> The basic idea is like Persistent Kernel Map (PKMAP) in Linux. We
> pre-populate all the relevant page tables before the system is fully
> set up.
> 
> We will need it on Arm in order to rework the arm64 version of
> xenheap_setup_mappings() as we may need to use pages allocated from
> the boot allocator before they are effectively mapped.
> 
> This infrastructure is not lock-protected therefore can only be used
> before smpboot. After smpboot, map_domain_page() has to be used.
> 
> This is based on the x86 version [1] that was originally implemented
> by Wei Liu.
> 
> The PMAP infrastructure is implemented in common code with some
> arch helpers to set/clear the page-table entries and convertion
> between a fixmap slot to a virtual address...
> 
> As mfn_to_xen_entry() now needs to be exported, take the opportunity
> to swich the parameter attr from unsigned to unsigned int.
> 
> [1] <e92da4ad6015b6089737fcccba3ec1d6424649a5.1588278317.git.hongyxia@amazon.com>
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> [julien: Adapted for Arm]
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
>      Changes in v4:
>          - Move xen_fixmap in fixmap.h and add a comment about its usage.
>          - Update comments
>          - Use DECLARE_BITMAP()
>          - Replace local_irq_{enable, disable} with an ASSERT() as there
>            should be no user of pmap() in interrupt context.
> 
>      Changes in v3:
>          - s/BITS_PER_LONG/BITS_PER_BYTE/
>          - Move pmap to common code
> 
>      Changes in v2:
>          - New patch
> 
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Wei Liu <wl@xen.org>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Roger Pau Monné <roger.pau@citrix.com>
> ---
>   xen/arch/arm/Kconfig              |  1 +
>   xen/arch/arm/include/asm/fixmap.h | 24 +++++++++++
>   xen/arch/arm/include/asm/lpae.h   |  8 ++++
>   xen/arch/arm/include/asm/pmap.h   | 32 ++++++++++++++
>   xen/arch/arm/mm.c                 |  7 +--
>   xen/common/Kconfig                |  3 ++
>   xen/common/Makefile               |  1 +
>   xen/common/pmap.c                 | 72 +++++++++++++++++++++++++++++++
>   xen/include/xen/pmap.h            | 16 +++++++
>   9 files changed, 158 insertions(+), 6 deletions(-)
>   create mode 100644 xen/arch/arm/include/asm/pmap.h
>   create mode 100644 xen/common/pmap.c
>   create mode 100644 xen/include/xen/pmap.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index ecfa6822e4d3..a89a67802aa9 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -14,6 +14,7 @@ config ARM
>   	select HAS_DEVICE_TREE
>   	select HAS_PASSTHROUGH
>   	select HAS_PDX
> +	select HAS_PMAP
>   	select IOMMU_FORCE_PT_SHARE
>   
>   config ARCH_DEFCONFIG
> diff --git a/xen/arch/arm/include/asm/fixmap.h b/xen/arch/arm/include/asm/fixmap.h
> index 1cee51e52ab9..365a2385a087 100644
> --- a/xen/arch/arm/include/asm/fixmap.h
> +++ b/xen/arch/arm/include/asm/fixmap.h
> @@ -5,20 +5,44 @@
>   #define __ASM_FIXMAP_H
>   
>   #include <xen/acpi.h>
> +#include <xen/pmap.h>
>   
>   /* Fixmap slots */
>   #define FIXMAP_CONSOLE  0  /* The primary UART */
>   #define FIXMAP_MISC     1  /* Ephemeral mappings of hardware */
>   #define FIXMAP_ACPI_BEGIN  2  /* Start mappings of ACPI tables */
>   #define FIXMAP_ACPI_END    (FIXMAP_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1)  /* End mappings of ACPI tables */
> +#define FIXMAP_PMAP_BEGIN (FIXMAP_ACPI_END + 1) /* Start of PMAP */
> +#define FIXMAP_PMAP_END (FIXMAP_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
> +
> +#define FIXMAP_LAST FIXMAP_PMAP_END
> +
> +#define FIXADDR_START FIXMAP_ADDR(0)
> +#define FIXADDR_TOP FIXMAP_ADDR(FIXMAP_LAST)
>   
>   #ifndef __ASSEMBLY__
>   
> +/*
> + * Direct access to xen_fixmap[] should only happen when {set,
> + * clear}_fixmap() is unusable (e.g. where we would end up to
> + * recursively call the helpers).
> + */
> +extern lpae_t xen_fixmap[XEN_PT_LPAE_ENTRIES];
> +
>   /* Map a page in a fixmap entry */
>   extern void set_fixmap(unsigned map, mfn_t mfn, unsigned attributes);
>   /* Remove a mapping from a fixmap entry */
>   extern void clear_fixmap(unsigned map);
>   
> +#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
> +
> +static inline unsigned int virt_to_fix(vaddr_t vaddr)
> +{
> +    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
> +
> +    return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
> +}
> +
>   #endif /* __ASSEMBLY__ */
>   
>   #endif /* __ASM_FIXMAP_H */
> diff --git a/xen/arch/arm/include/asm/lpae.h b/xen/arch/arm/include/asm/lpae.h
> index aecb320dec45..fc19cbd84772 100644
> --- a/xen/arch/arm/include/asm/lpae.h
> +++ b/xen/arch/arm/include/asm/lpae.h
> @@ -4,6 +4,7 @@
>   #ifndef __ASSEMBLY__
>   
>   #include <xen/page-defs.h>
> +#include <xen/mm-frame.h>
>   
>   /*
>    * WARNING!  Unlike the x86 pagetable code, where l1 is the lowest level and
> @@ -168,6 +169,13 @@ static inline bool lpae_is_superpage(lpae_t pte, unsigned int level)
>           third_table_offset(addr)            \
>       }
>   
> +/*
> + * Standard entry type that we'll use to build Xen's own pagetables.
> + * We put the same permissions at every level, because they're ignored
> + * by the walker in non-leaf entries.
> + */
> +lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr);
> +
>   #endif /* __ASSEMBLY__ */
>   
>   /*
> diff --git a/xen/arch/arm/include/asm/pmap.h b/xen/arch/arm/include/asm/pmap.h
> new file mode 100644
> index 000000000000..74398b4c4fe6
> --- /dev/null
> +++ b/xen/arch/arm/include/asm/pmap.h
> @@ -0,0 +1,32 @@
> +#ifndef __ASM_PMAP_H__
> +#define __ASM_PMAP_H__
> +
> +#include <xen/mm.h>
> +
> +#include <asm/fixmap.h>
> +
> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
> +{
> +    lpae_t *entry = &xen_fixmap[slot];
> +    lpae_t pte;
> +
> +    ASSERT(!lpae_is_valid(*entry));
> +

Sometimes it is very difficult for me to determine whether to
use ASSERT or fixed check in this situation. In debug=n config,
is there any risk of pte override of arch_pmap_map should be
prevented? IMO, it's better to provide a return value for this
function and use a fixed check here.

> +    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
> +    pte.pt.table = 1;
> +    write_pte(entry, pte);
> +}
> +
> +static inline void arch_pmap_unmap(unsigned int slot)
> +{
> +    lpae_t pte = {};
> +

We have checked lpae_is_valid() in arch_pmap_map. So can we add a
!lpae_is_valid check here and return directly?

> +    write_pte(&xen_fixmap[slot], pte);
> +
> +    flush_xen_tlb_range_va_local(FIXMAP_ADDR(slot), PAGE_SIZE);
> +}
> +
> +void arch_pmap_map_slot(unsigned int slot, mfn_t mfn);
> +void arch_pmap_clear_slot(void *ptr);
> +
> +#endif /* __ASM_PMAP_H__ */
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 52b2a0394047..bd1348a99716 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -305,12 +305,7 @@ void dump_hyp_walk(vaddr_t addr)
>       dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1);
>   }
>   
> -/*
> - * Standard entry type that we'll use to build Xen's own pagetables.
> - * We put the same permissions at every level, because they're ignored
> - * by the walker in non-leaf entries.
> - */
> -static inline lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned attr)
> +lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr)
>   {
>       lpae_t e = (lpae_t) {
>           .pt = {
> diff --git a/xen/common/Kconfig b/xen/common/Kconfig
> index d921c74d615e..5b6b2406c028 100644
> --- a/xen/common/Kconfig
> +++ b/xen/common/Kconfig
> @@ -49,6 +49,9 @@ config HAS_KEXEC
>   config HAS_PDX
>   	bool
>   
> +config HAS_PMAP
> +	bool
> +
>   config HAS_SCHED_GRANULARITY
>   	bool
>   
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index b1e076c30b81..3baf83d527d8 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -29,6 +29,7 @@ obj-y += notifier.o
>   obj-y += page_alloc.o
>   obj-$(CONFIG_HAS_PDX) += pdx.o
>   obj-$(CONFIG_PERF_COUNTERS) += perfc.o
> +obj-bin-$(CONFIG_HAS_PMAP) += pmap.init.o
>   obj-y += preempt.o
>   obj-y += random.o
>   obj-y += rangeset.o
> diff --git a/xen/common/pmap.c b/xen/common/pmap.c
> new file mode 100644
> index 000000000000..9355cacb7373
> --- /dev/null
> +++ b/xen/common/pmap.c
> @@ -0,0 +1,72 @@
> +#include <xen/bitops.h>
> +#include <xen/init.h>
> +#include <xen/irq.h>
> +#include <xen/pmap.h>
> +
> +#include <asm/pmap.h>
> +#include <asm/fixmap.h>
> +
> +/*
> + * Simple mapping infrastructure to map / unmap pages in fixed map.
> + * This is used to set the page table before the map domain page infrastructure
> + * is initialized.
> + *
> + * This structure is not protected by any locks, so it must not be used after
> + * smp bring-up.
> + */
> +
> +/* Bitmap to track which slot is used */
> +static __initdata DECLARE_BITMAP(inuse, NUM_FIX_PMAP);
> +
> +void *__init pmap_map(mfn_t mfn)
> +{
> +    unsigned int idx;
> +    unsigned int slot;
> +
> +    ASSERT(system_state < SYS_STATE_smp_boot);
> +    ASSERT(!in_irq());
> +
> +    idx = find_first_zero_bit(inuse, NUM_FIX_PMAP);
> +    if ( idx == NUM_FIX_PMAP )
> +        panic("Out of PMAP slots\n");
> +
> +    __set_bit(idx, inuse);
> +
> +    slot = idx + FIXMAP_PMAP_BEGIN;
> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
> +
> +    /*
> +     * We cannot use set_fixmap() here. We use PMAP when the domain map
> +     * page infrastructure is not yet initialized, so map_pages_to_xen() called
> +     * by set_fixmap() needs to map pages on demand, which then calls pmap()
> +     * again, resulting in a loop. Modify the PTEs directly instead. The same
> +     * is true for pmap_unmap().
> +     */
> +    arch_pmap_map(slot, mfn);
> +
> +    return fix_to_virt(slot);
> +}
> +
> +void __init pmap_unmap(const void *p)
> +{
> +    unsigned int idx;
> +    unsigned int slot = virt_to_fix((unsigned long)p);
> +
> +    ASSERT(system_state < SYS_STATE_smp_boot);
> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
> +    ASSERT(in_irq());
> +

Why this condition is in_irq? Is it for TLB operation in arch_pmap_unmap?

Cheers,
Wei Chen

> +    idx = slot - FIXMAP_PMAP_BEGIN;
> +
> +    __clear_bit(idx, inuse);
> +    arch_pmap_unmap(slot);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/xen/pmap.h b/xen/include/xen/pmap.h
> new file mode 100644
> index 000000000000..93e61b10870e
> --- /dev/null
> +++ b/xen/include/xen/pmap.h
> @@ -0,0 +1,16 @@
> +#ifndef __XEN_PMAP_H__
> +#define __XEN_PMAP_H__
> +
> +/* Large enough for mapping 5 levels of page tables with some headroom */
> +#define NUM_FIX_PMAP 8
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <xen/mm-frame.h>
> +
> +void *pmap_map(mfn_t mfn);
> +void pmap_unmap(const void *p);
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* __XEN_PMAP_H__ */
Julien Grall May 24, 2022, 8:58 a.m. UTC | #3
On 24/05/2022 03:11, Wei Chen wrote:
> Hi Julien,

Hi Wei,

>> diff --git a/xen/arch/arm/include/asm/pmap.h 
>> b/xen/arch/arm/include/asm/pmap.h
>> new file mode 100644
>> index 000000000000..74398b4c4fe6
>> --- /dev/null
>> +++ b/xen/arch/arm/include/asm/pmap.h
>> @@ -0,0 +1,32 @@
>> +#ifndef __ASM_PMAP_H__
>> +#define __ASM_PMAP_H__
>> +
>> +#include <xen/mm.h>
>> +
>> +#include <asm/fixmap.h>
>> +
>> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
>> +{
>> +    lpae_t *entry = &xen_fixmap[slot];
>> +    lpae_t pte;
>> +
>> +    ASSERT(!lpae_is_valid(*entry));
>> +
> 
> Sometimes it is very difficult for me to determine whether to
> use ASSERT or fixed check in this situation. In debug=n config,
> is there any risk of pte override of arch_pmap_map should be
> prevented? 

There is always a risk :). In this case, this would be a programming 
error if the slot contains a valid entry. Hence why an ASSERT() (They 
tend to be use for programming error).

> IMO, it's better to provide a return value for this
> function and use a fixed check here.
As I wrote above, arch_pmap_map() is not meant to be called in such 
situation. If we return an error, then there are a lot more churn 
necessary (pmap_map() would now need to return NULL...) for something 
that is never meant to happen.

> 
>> +    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
>> +    pte.pt.table = 1;
>> +    write_pte(entry, pte);
>> +}
>> +
>> +static inline void arch_pmap_unmap(unsigned int slot)
>> +{
>> +    lpae_t pte = {};
>> +
> 
> We have checked lpae_is_valid() in arch_pmap_map. So can we add a
> !lpae_is_valid check here and return directly?
The code below can work with invalid entry and this function is not 
meant to be called in such case.

So to me this is sounds like an unnecessary optimization.

>> +void __init pmap_unmap(const void *p)
>> +{
>> +    unsigned int idx;
>> +    unsigned int slot = virt_to_fix((unsigned long)p);
>> +
>> +    ASSERT(system_state < SYS_STATE_smp_boot);
>> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
>> +    ASSERT(in_irq());
>> +
> 
> Why this condition is in_irq?

This should be !in_irq().

> Is it for TLB operation in arch_pmap_unmap?

No. pmap_{map, unmap} are not re-entreant. So we have two choices here:
  1) Forbid the helpers to be used in IRQ context
  2) Use local_irq_{disable, enable}

I originally used the caller but given that are no users in IRQ 
contexts, I went with 1.

Cheers,
Luca Fancellu May 26, 2022, 3:55 p.m. UTC | #4
> On 20 May 2022, at 13:09, Julien Grall <julien@xen.org> wrote:
> 
> From: Wei Liu <wei.liu2@citrix.com>
> 
> The basic idea is like Persistent Kernel Map (PKMAP) in Linux. We
> pre-populate all the relevant page tables before the system is fully
> set up.
> 
> We will need it on Arm in order to rework the arm64 version of
> xenheap_setup_mappings() as we may need to use pages allocated from
> the boot allocator before they are effectively mapped.
> 
> This infrastructure is not lock-protected therefore can only be used
> before smpboot. After smpboot, map_domain_page() has to be used.
> 
> This is based on the x86 version [1] that was originally implemented
> by Wei Liu.
> 
> The PMAP infrastructure is implemented in common code with some
> arch helpers to set/clear the page-table entries and convertion
> between a fixmap slot to a virtual address...
> 
> As mfn_to_xen_entry() now needs to be exported, take the opportunity
> to swich the parameter attr from unsigned to unsigned int.
> 
> [1] <e92da4ad6015b6089737fcccba3ec1d6424649a5.1588278317.git.hongyxia@amazon.com>
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> [julien: Adapted for Arm]
> Signed-off-by: Julien Grall <jgrall@amazon.com>

Hi Julien,

with ASSERT(!in_irq()) in pmap_unmap(const void *p) as you previously say.

Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>

I’ve also tested patches up to this one, start/destroying/connecting-to few guests
and no problem.

Tested-by: Luca Fancellu <luca.fancellu@arm.com>
Stefano Stabellini June 8, 2022, 1:08 a.m. UTC | #5
On Fri, 20 May 2022, Julien Grall wrote:
> From: Wei Liu <wei.liu2@citrix.com>
> 
> The basic idea is like Persistent Kernel Map (PKMAP) in Linux. We
> pre-populate all the relevant page tables before the system is fully
> set up.
> 
> We will need it on Arm in order to rework the arm64 version of
> xenheap_setup_mappings() as we may need to use pages allocated from
> the boot allocator before they are effectively mapped.
> 
> This infrastructure is not lock-protected therefore can only be used
> before smpboot. After smpboot, map_domain_page() has to be used.
> 
> This is based on the x86 version [1] that was originally implemented
> by Wei Liu.
> 
> The PMAP infrastructure is implemented in common code with some
> arch helpers to set/clear the page-table entries and convertion
> between a fixmap slot to a virtual address...
> 
> As mfn_to_xen_entry() now needs to be exported, take the opportunity
> to swich the parameter attr from unsigned to unsigned int.
> 
> [1] <e92da4ad6015b6089737fcccba3ec1d6424649a5.1588278317.git.hongyxia@amazon.com>
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> [julien: Adapted for Arm]
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
>     Changes in v4:
>         - Move xen_fixmap in fixmap.h and add a comment about its usage.
>         - Update comments
>         - Use DECLARE_BITMAP()
>         - Replace local_irq_{enable, disable} with an ASSERT() as there
>           should be no user of pmap() in interrupt context.
> 
>     Changes in v3:
>         - s/BITS_PER_LONG/BITS_PER_BYTE/
>         - Move pmap to common code
> 
>     Changes in v2:
>         - New patch
> 
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Wei Liu <wl@xen.org>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/arm/Kconfig              |  1 +
>  xen/arch/arm/include/asm/fixmap.h | 24 +++++++++++
>  xen/arch/arm/include/asm/lpae.h   |  8 ++++
>  xen/arch/arm/include/asm/pmap.h   | 32 ++++++++++++++
>  xen/arch/arm/mm.c                 |  7 +--
>  xen/common/Kconfig                |  3 ++
>  xen/common/Makefile               |  1 +
>  xen/common/pmap.c                 | 72 +++++++++++++++++++++++++++++++
>  xen/include/xen/pmap.h            | 16 +++++++
>  9 files changed, 158 insertions(+), 6 deletions(-)
>  create mode 100644 xen/arch/arm/include/asm/pmap.h
>  create mode 100644 xen/common/pmap.c
>  create mode 100644 xen/include/xen/pmap.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index ecfa6822e4d3..a89a67802aa9 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -14,6 +14,7 @@ config ARM
>  	select HAS_DEVICE_TREE
>  	select HAS_PASSTHROUGH
>  	select HAS_PDX
> +	select HAS_PMAP
>  	select IOMMU_FORCE_PT_SHARE
>  
>  config ARCH_DEFCONFIG
> diff --git a/xen/arch/arm/include/asm/fixmap.h b/xen/arch/arm/include/asm/fixmap.h
> index 1cee51e52ab9..365a2385a087 100644
> --- a/xen/arch/arm/include/asm/fixmap.h
> +++ b/xen/arch/arm/include/asm/fixmap.h
> @@ -5,20 +5,44 @@
>  #define __ASM_FIXMAP_H
>  
>  #include <xen/acpi.h>
> +#include <xen/pmap.h>
>  
>  /* Fixmap slots */
>  #define FIXMAP_CONSOLE  0  /* The primary UART */
>  #define FIXMAP_MISC     1  /* Ephemeral mappings of hardware */
>  #define FIXMAP_ACPI_BEGIN  2  /* Start mappings of ACPI tables */
>  #define FIXMAP_ACPI_END    (FIXMAP_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1)  /* End mappings of ACPI tables */
> +#define FIXMAP_PMAP_BEGIN (FIXMAP_ACPI_END + 1) /* Start of PMAP */
> +#define FIXMAP_PMAP_END (FIXMAP_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
> +
> +#define FIXMAP_LAST FIXMAP_PMAP_END
> +
> +#define FIXADDR_START FIXMAP_ADDR(0)
> +#define FIXADDR_TOP FIXMAP_ADDR(FIXMAP_LAST)
>  
>  #ifndef __ASSEMBLY__
>  
> +/*
> + * Direct access to xen_fixmap[] should only happen when {set,
> + * clear}_fixmap() is unusable (e.g. where we would end up to
> + * recursively call the helpers).
> + */
> +extern lpae_t xen_fixmap[XEN_PT_LPAE_ENTRIES];
> +
>  /* Map a page in a fixmap entry */
>  extern void set_fixmap(unsigned map, mfn_t mfn, unsigned attributes);
>  /* Remove a mapping from a fixmap entry */
>  extern void clear_fixmap(unsigned map);
>  
> +#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
> +
> +static inline unsigned int virt_to_fix(vaddr_t vaddr)
> +{
> +    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
> +
> +    return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
> +}
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* __ASM_FIXMAP_H */
> diff --git a/xen/arch/arm/include/asm/lpae.h b/xen/arch/arm/include/asm/lpae.h
> index aecb320dec45..fc19cbd84772 100644
> --- a/xen/arch/arm/include/asm/lpae.h
> +++ b/xen/arch/arm/include/asm/lpae.h
> @@ -4,6 +4,7 @@
>  #ifndef __ASSEMBLY__
>  
>  #include <xen/page-defs.h>
> +#include <xen/mm-frame.h>
>  
>  /*
>   * WARNING!  Unlike the x86 pagetable code, where l1 is the lowest level and
> @@ -168,6 +169,13 @@ static inline bool lpae_is_superpage(lpae_t pte, unsigned int level)
>          third_table_offset(addr)            \
>      }
>  
> +/*
> + * Standard entry type that we'll use to build Xen's own pagetables.
> + * We put the same permissions at every level, because they're ignored
> + * by the walker in non-leaf entries.
> + */
> +lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  /*
> diff --git a/xen/arch/arm/include/asm/pmap.h b/xen/arch/arm/include/asm/pmap.h
> new file mode 100644
> index 000000000000..74398b4c4fe6
> --- /dev/null
> +++ b/xen/arch/arm/include/asm/pmap.h
> @@ -0,0 +1,32 @@
> +#ifndef __ASM_PMAP_H__
> +#define __ASM_PMAP_H__
> +
> +#include <xen/mm.h>
> +
> +#include <asm/fixmap.h>
> +
> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
> +{
> +    lpae_t *entry = &xen_fixmap[slot];
> +    lpae_t pte;
> +
> +    ASSERT(!lpae_is_valid(*entry));
> +
> +    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
> +    pte.pt.table = 1;
> +    write_pte(entry, pte);

Here we don't need a tlb flush because we never go from a valid mapping
to another valid mapping. We also go through arch_pmap_unmap which
clears the mapping and also flushes the tlb. Is that right?


> +}
> +
> +static inline void arch_pmap_unmap(unsigned int slot)
> +{
> +    lpae_t pte = {};
> +
> +    write_pte(&xen_fixmap[slot], pte);
> +
> +    flush_xen_tlb_range_va_local(FIXMAP_ADDR(slot), PAGE_SIZE);
> +}
> +
> +void arch_pmap_map_slot(unsigned int slot, mfn_t mfn);
> +void arch_pmap_clear_slot(void *ptr);

What are these two? They are not defined anywhere?


> +#endif /* __ASM_PMAP_H__ */
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 52b2a0394047..bd1348a99716 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -305,12 +305,7 @@ void dump_hyp_walk(vaddr_t addr)
>      dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1);
>  }
>  
> -/*
> - * Standard entry type that we'll use to build Xen's own pagetables.
> - * We put the same permissions at every level, because they're ignored
> - * by the walker in non-leaf entries.
> - */
> -static inline lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned attr)
> +lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr)
>  {
>      lpae_t e = (lpae_t) {
>          .pt = {
> diff --git a/xen/common/Kconfig b/xen/common/Kconfig
> index d921c74d615e..5b6b2406c028 100644
> --- a/xen/common/Kconfig
> +++ b/xen/common/Kconfig
> @@ -49,6 +49,9 @@ config HAS_KEXEC
>  config HAS_PDX
>  	bool
>  
> +config HAS_PMAP
> +	bool
> +
>  config HAS_SCHED_GRANULARITY
>  	bool
>  
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index b1e076c30b81..3baf83d527d8 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -29,6 +29,7 @@ obj-y += notifier.o
>  obj-y += page_alloc.o
>  obj-$(CONFIG_HAS_PDX) += pdx.o
>  obj-$(CONFIG_PERF_COUNTERS) += perfc.o
> +obj-bin-$(CONFIG_HAS_PMAP) += pmap.init.o
>  obj-y += preempt.o
>  obj-y += random.o
>  obj-y += rangeset.o
> diff --git a/xen/common/pmap.c b/xen/common/pmap.c
> new file mode 100644
> index 000000000000..9355cacb7373
> --- /dev/null
> +++ b/xen/common/pmap.c
> @@ -0,0 +1,72 @@
> +#include <xen/bitops.h>
> +#include <xen/init.h>
> +#include <xen/irq.h>
> +#include <xen/pmap.h>
> +
> +#include <asm/pmap.h>
> +#include <asm/fixmap.h>
> +
> +/*
> + * Simple mapping infrastructure to map / unmap pages in fixed map.
> + * This is used to set the page table before the map domain page infrastructure
> + * is initialized.
> + *
> + * This structure is not protected by any locks, so it must not be used after
> + * smp bring-up.
> + */
> +
> +/* Bitmap to track which slot is used */
> +static __initdata DECLARE_BITMAP(inuse, NUM_FIX_PMAP);
> +
> +void *__init pmap_map(mfn_t mfn)
> +{
> +    unsigned int idx;
> +    unsigned int slot;
> +
> +    ASSERT(system_state < SYS_STATE_smp_boot);
> +    ASSERT(!in_irq());
> +
> +    idx = find_first_zero_bit(inuse, NUM_FIX_PMAP);
> +    if ( idx == NUM_FIX_PMAP )
> +        panic("Out of PMAP slots\n");
> +
> +    __set_bit(idx, inuse);
> +
> +    slot = idx + FIXMAP_PMAP_BEGIN;
> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
> +
> +    /*
> +     * We cannot use set_fixmap() here. We use PMAP when the domain map
> +     * page infrastructure is not yet initialized, so map_pages_to_xen() called
> +     * by set_fixmap() needs to map pages on demand, which then calls pmap()
> +     * again, resulting in a loop. Modify the PTEs directly instead. The same
> +     * is true for pmap_unmap().
> +     */
> +    arch_pmap_map(slot, mfn);
> +
> +    return fix_to_virt(slot);
> +}
> +
> +void __init pmap_unmap(const void *p)
> +{
> +    unsigned int idx;
> +    unsigned int slot = virt_to_fix((unsigned long)p);
> +
> +    ASSERT(system_state < SYS_STATE_smp_boot);
> +    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
> +    ASSERT(in_irq());
> +
> +    idx = slot - FIXMAP_PMAP_BEGIN;
> +
> +    __clear_bit(idx, inuse);
> +    arch_pmap_unmap(slot);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/xen/pmap.h b/xen/include/xen/pmap.h
> new file mode 100644
> index 000000000000..93e61b10870e
> --- /dev/null
> +++ b/xen/include/xen/pmap.h
> @@ -0,0 +1,16 @@
> +#ifndef __XEN_PMAP_H__
> +#define __XEN_PMAP_H__
> +
> +/* Large enough for mapping 5 levels of page tables with some headroom */
> +#define NUM_FIX_PMAP 8
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <xen/mm-frame.h>
> +
> +void *pmap_map(mfn_t mfn);
> +void pmap_unmap(const void *p);
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* __XEN_PMAP_H__ */
> -- 
> 2.32.0
>
Julien Grall June 8, 2022, 10:01 a.m. UTC | #6
Hi Stefano,

On 08/06/2022 02:08, Stefano Stabellini wrote:
>> diff --git a/xen/arch/arm/include/asm/pmap.h b/xen/arch/arm/include/asm/pmap.h
>> new file mode 100644
>> index 000000000000..74398b4c4fe6
>> --- /dev/null
>> +++ b/xen/arch/arm/include/asm/pmap.h
>> @@ -0,0 +1,32 @@
>> +#ifndef __ASM_PMAP_H__
>> +#define __ASM_PMAP_H__
>> +
>> +#include <xen/mm.h>
>> +
>> +#include <asm/fixmap.h>
>> +
>> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
>> +{
>> +    lpae_t *entry = &xen_fixmap[slot];
>> +    lpae_t pte;
>> +
>> +    ASSERT(!lpae_is_valid(*entry));
>> +
>> +    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
>> +    pte.pt.table = 1;
>> +    write_pte(entry, pte);
> 
> Here we don't need a tlb flush because we never go from a valid mapping
> to another valid mapping.

A TLB flush would not be sufficient here. You would need to follow the 
break-before-make sequence in order to replace a valid mapping with 
another valid mapping.

> We also go through arch_pmap_unmap which
> clears the mapping and also flushes the tlb. Is that right?

The PMAP code is using a bitmap to know which entry is used. So when 
arch_pmap_map() is called, we also guarantees the entry will be invalid 
(hence the ASSERT(!lpae_is_valid()).

The bit in the bitmap will only be cleared by pmap_unmap() which will 
result to a TLB flush.

>> +}
>> +
>> +static inline void arch_pmap_unmap(unsigned int slot)
>> +{
>> +    lpae_t pte = {};
>> +
>> +    write_pte(&xen_fixmap[slot], pte);
>> +
>> +    flush_xen_tlb_range_va_local(FIXMAP_ADDR(slot), PAGE_SIZE);
>> +}
>> +
>> +void arch_pmap_map_slot(unsigned int slot, mfn_t mfn);
>> +void arch_pmap_clear_slot(void *ptr);
> 
> What are these two? They are not defined anywhere?

It is left-over. I will drop them.

Cheers,
Stefano Stabellini June 9, 2022, 12:25 a.m. UTC | #7
On Wed, 8 Jun 2022, Julien Grall wrote:
> On 08/06/2022 02:08, Stefano Stabellini wrote:
> > > diff --git a/xen/arch/arm/include/asm/pmap.h
> > > b/xen/arch/arm/include/asm/pmap.h
> > > new file mode 100644
> > > index 000000000000..74398b4c4fe6
> > > --- /dev/null
> > > +++ b/xen/arch/arm/include/asm/pmap.h
> > > @@ -0,0 +1,32 @@
> > > +#ifndef __ASM_PMAP_H__
> > > +#define __ASM_PMAP_H__
> > > +
> > > +#include <xen/mm.h>
> > > +
> > > +#include <asm/fixmap.h>
> > > +
> > > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
> > > +{
> > > +    lpae_t *entry = &xen_fixmap[slot];
> > > +    lpae_t pte;
> > > +
> > > +    ASSERT(!lpae_is_valid(*entry));
> > > +
> > > +    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
> > > +    pte.pt.table = 1;
> > > +    write_pte(entry, pte);
> > 
> > Here we don't need a tlb flush because we never go from a valid mapping
> > to another valid mapping.
> 
> A TLB flush would not be sufficient here. You would need to follow the
> break-before-make sequence in order to replace a valid mapping with another
> valid mapping.
> 
> > We also go through arch_pmap_unmap which
> > clears the mapping and also flushes the tlb. Is that right?
> 
> The PMAP code is using a bitmap to know which entry is used. So when
> arch_pmap_map() is called, we also guarantees the entry will be invalid (hence
> the ASSERT(!lpae_is_valid()).
> 
> The bit in the bitmap will only be cleared by pmap_unmap() which will result
> to a TLB flush.
> 
> > > +}
> > > +
> > > +static inline void arch_pmap_unmap(unsigned int slot)
> > > +{
> > > +    lpae_t pte = {};
> > > +
> > > +    write_pte(&xen_fixmap[slot], pte);
> > > +
> > > +    flush_xen_tlb_range_va_local(FIXMAP_ADDR(slot), PAGE_SIZE);
> > > +}
> > > +
> > > +void arch_pmap_map_slot(unsigned int slot, mfn_t mfn);
> > > +void arch_pmap_clear_slot(void *ptr);
> > 
> > What are these two? They are not defined anywhere?
> 
> It is left-over. I will drop them.

With those two functions removed, you can add my 

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
diff mbox series

Patch

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index ecfa6822e4d3..a89a67802aa9 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -14,6 +14,7 @@  config ARM
 	select HAS_DEVICE_TREE
 	select HAS_PASSTHROUGH
 	select HAS_PDX
+	select HAS_PMAP
 	select IOMMU_FORCE_PT_SHARE
 
 config ARCH_DEFCONFIG
diff --git a/xen/arch/arm/include/asm/fixmap.h b/xen/arch/arm/include/asm/fixmap.h
index 1cee51e52ab9..365a2385a087 100644
--- a/xen/arch/arm/include/asm/fixmap.h
+++ b/xen/arch/arm/include/asm/fixmap.h
@@ -5,20 +5,44 @@ 
 #define __ASM_FIXMAP_H
 
 #include <xen/acpi.h>
+#include <xen/pmap.h>
 
 /* Fixmap slots */
 #define FIXMAP_CONSOLE  0  /* The primary UART */
 #define FIXMAP_MISC     1  /* Ephemeral mappings of hardware */
 #define FIXMAP_ACPI_BEGIN  2  /* Start mappings of ACPI tables */
 #define FIXMAP_ACPI_END    (FIXMAP_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1)  /* End mappings of ACPI tables */
+#define FIXMAP_PMAP_BEGIN (FIXMAP_ACPI_END + 1) /* Start of PMAP */
+#define FIXMAP_PMAP_END (FIXMAP_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
+
+#define FIXMAP_LAST FIXMAP_PMAP_END
+
+#define FIXADDR_START FIXMAP_ADDR(0)
+#define FIXADDR_TOP FIXMAP_ADDR(FIXMAP_LAST)
 
 #ifndef __ASSEMBLY__
 
+/*
+ * Direct access to xen_fixmap[] should only happen when {set,
+ * clear}_fixmap() is unusable (e.g. where we would end up to
+ * recursively call the helpers).
+ */
+extern lpae_t xen_fixmap[XEN_PT_LPAE_ENTRIES];
+
 /* Map a page in a fixmap entry */
 extern void set_fixmap(unsigned map, mfn_t mfn, unsigned attributes);
 /* Remove a mapping from a fixmap entry */
 extern void clear_fixmap(unsigned map);
 
+#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
+
+static inline unsigned int virt_to_fix(vaddr_t vaddr)
+{
+    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+
+    return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_FIXMAP_H */
diff --git a/xen/arch/arm/include/asm/lpae.h b/xen/arch/arm/include/asm/lpae.h
index aecb320dec45..fc19cbd84772 100644
--- a/xen/arch/arm/include/asm/lpae.h
+++ b/xen/arch/arm/include/asm/lpae.h
@@ -4,6 +4,7 @@ 
 #ifndef __ASSEMBLY__
 
 #include <xen/page-defs.h>
+#include <xen/mm-frame.h>
 
 /*
  * WARNING!  Unlike the x86 pagetable code, where l1 is the lowest level and
@@ -168,6 +169,13 @@  static inline bool lpae_is_superpage(lpae_t pte, unsigned int level)
         third_table_offset(addr)            \
     }
 
+/*
+ * Standard entry type that we'll use to build Xen's own pagetables.
+ * We put the same permissions at every level, because they're ignored
+ * by the walker in non-leaf entries.
+ */
+lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr);
+
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/xen/arch/arm/include/asm/pmap.h b/xen/arch/arm/include/asm/pmap.h
new file mode 100644
index 000000000000..74398b4c4fe6
--- /dev/null
+++ b/xen/arch/arm/include/asm/pmap.h
@@ -0,0 +1,32 @@ 
+#ifndef __ASM_PMAP_H__
+#define __ASM_PMAP_H__
+
+#include <xen/mm.h>
+
+#include <asm/fixmap.h>
+
+static inline void arch_pmap_map(unsigned int slot, mfn_t mfn)
+{
+    lpae_t *entry = &xen_fixmap[slot];
+    lpae_t pte;
+
+    ASSERT(!lpae_is_valid(*entry));
+
+    pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW);
+    pte.pt.table = 1;
+    write_pte(entry, pte);
+}
+
+static inline void arch_pmap_unmap(unsigned int slot)
+{
+    lpae_t pte = {};
+
+    write_pte(&xen_fixmap[slot], pte);
+
+    flush_xen_tlb_range_va_local(FIXMAP_ADDR(slot), PAGE_SIZE);
+}
+
+void arch_pmap_map_slot(unsigned int slot, mfn_t mfn);
+void arch_pmap_clear_slot(void *ptr);
+
+#endif /* __ASM_PMAP_H__ */
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 52b2a0394047..bd1348a99716 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -305,12 +305,7 @@  void dump_hyp_walk(vaddr_t addr)
     dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1);
 }
 
-/*
- * Standard entry type that we'll use to build Xen's own pagetables.
- * We put the same permissions at every level, because they're ignored
- * by the walker in non-leaf entries.
- */
-static inline lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned attr)
+lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr)
 {
     lpae_t e = (lpae_t) {
         .pt = {
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index d921c74d615e..5b6b2406c028 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -49,6 +49,9 @@  config HAS_KEXEC
 config HAS_PDX
 	bool
 
+config HAS_PMAP
+	bool
+
 config HAS_SCHED_GRANULARITY
 	bool
 
diff --git a/xen/common/Makefile b/xen/common/Makefile
index b1e076c30b81..3baf83d527d8 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -29,6 +29,7 @@  obj-y += notifier.o
 obj-y += page_alloc.o
 obj-$(CONFIG_HAS_PDX) += pdx.o
 obj-$(CONFIG_PERF_COUNTERS) += perfc.o
+obj-bin-$(CONFIG_HAS_PMAP) += pmap.init.o
 obj-y += preempt.o
 obj-y += random.o
 obj-y += rangeset.o
diff --git a/xen/common/pmap.c b/xen/common/pmap.c
new file mode 100644
index 000000000000..9355cacb7373
--- /dev/null
+++ b/xen/common/pmap.c
@@ -0,0 +1,72 @@ 
+#include <xen/bitops.h>
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/pmap.h>
+
+#include <asm/pmap.h>
+#include <asm/fixmap.h>
+
+/*
+ * Simple mapping infrastructure to map / unmap pages in fixed map.
+ * This is used to set the page table before the map domain page infrastructure
+ * is initialized.
+ *
+ * This structure is not protected by any locks, so it must not be used after
+ * smp bring-up.
+ */
+
+/* Bitmap to track which slot is used */
+static __initdata DECLARE_BITMAP(inuse, NUM_FIX_PMAP);
+
+void *__init pmap_map(mfn_t mfn)
+{
+    unsigned int idx;
+    unsigned int slot;
+
+    ASSERT(system_state < SYS_STATE_smp_boot);
+    ASSERT(!in_irq());
+
+    idx = find_first_zero_bit(inuse, NUM_FIX_PMAP);
+    if ( idx == NUM_FIX_PMAP )
+        panic("Out of PMAP slots\n");
+
+    __set_bit(idx, inuse);
+
+    slot = idx + FIXMAP_PMAP_BEGIN;
+    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
+
+    /*
+     * We cannot use set_fixmap() here. We use PMAP when the domain map
+     * page infrastructure is not yet initialized, so map_pages_to_xen() called
+     * by set_fixmap() needs to map pages on demand, which then calls pmap()
+     * again, resulting in a loop. Modify the PTEs directly instead. The same
+     * is true for pmap_unmap().
+     */
+    arch_pmap_map(slot, mfn);
+
+    return fix_to_virt(slot);
+}
+
+void __init pmap_unmap(const void *p)
+{
+    unsigned int idx;
+    unsigned int slot = virt_to_fix((unsigned long)p);
+
+    ASSERT(system_state < SYS_STATE_smp_boot);
+    ASSERT(slot >= FIXMAP_PMAP_BEGIN && slot <= FIXMAP_PMAP_END);
+    ASSERT(in_irq());
+
+    idx = slot - FIXMAP_PMAP_BEGIN;
+
+    __clear_bit(idx, inuse);
+    arch_pmap_unmap(slot);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/pmap.h b/xen/include/xen/pmap.h
new file mode 100644
index 000000000000..93e61b10870e
--- /dev/null
+++ b/xen/include/xen/pmap.h
@@ -0,0 +1,16 @@ 
+#ifndef __XEN_PMAP_H__
+#define __XEN_PMAP_H__
+
+/* Large enough for mapping 5 levels of page tables with some headroom */
+#define NUM_FIX_PMAP 8
+
+#ifndef __ASSEMBLY__
+
+#include <xen/mm-frame.h>
+
+void *pmap_map(mfn_t mfn);
+void pmap_unmap(const void *p);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PMAP_H__ */