Message ID | c7331e4c2f481f069571976a708c4aba04d2c0e4.1720799926.git.oleksii.kurochko@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RISCV device tree mapping | expand |
Hi Oleksii, On 12/07/2024 17:22, Oleksii Kurochko wrote: > Introduces arch_pmap_{un}map functions and select HAS_PMAP > for CONFIG_RISCV. > > Additionaly it was necessary to introduce functions: > - mfn_to_xen_entry > - mfn_to_pte > > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> > --- > Changes in V2: > - newly introduced patch > --- > xen/arch/riscv/Kconfig | 1 + > xen/arch/riscv/include/asm/page.h | 2 ++ > xen/arch/riscv/include/asm/pmap.h | 28 ++++++++++++++++++++++++++++ > xen/arch/riscv/mm.c | 14 ++++++++++++++ > 4 files changed, 45 insertions(+) > create mode 100644 xen/arch/riscv/include/asm/pmap.h > > diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig > index 259eea8d3b..0112aa8778 100644 > --- a/xen/arch/riscv/Kconfig > +++ b/xen/arch/riscv/Kconfig > @@ -3,6 +3,7 @@ config RISCV > select FUNCTION_ALIGNMENT_16B > select GENERIC_BUG_FRAME > select HAS_DEVICE_TREE > + select HAS_PMAP > > config RISCV_64 > def_bool y > diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h > index cbbf3656d1..339074d502 100644 > --- a/xen/arch/riscv/include/asm/page.h > +++ b/xen/arch/riscv/include/asm/page.h > @@ -51,6 +51,8 @@ typedef struct { > #endif > } pte_t; > > +pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr); > + > static inline pte_t paddr_to_pte(paddr_t paddr, > unsigned int permissions) > { > diff --git a/xen/arch/riscv/include/asm/pmap.h b/xen/arch/riscv/include/asm/pmap.h > new file mode 100644 > index 0000000000..eb4c48515c > --- /dev/null > +++ b/xen/arch/riscv/include/asm/pmap.h > @@ -0,0 +1,28 @@ > +#ifndef __ASM_PMAP_H__ > +#define __ASM_PMAP_H__ > + > +#include <xen/bug.h> > +#include <xen/mm.h> > + > +#include <asm/fixmap.h> > + > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > +{ > + pte_t *entry = &xen_fixmap[slot]; > + pte_t pte; > + > + ASSERT(!pte_is_valid(*entry)); > + > + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > + pte.pte |= PTE_LEAF_DEFAULT; > + write_pte(entry, pte); > +} > + > +static inline void arch_pmap_unmap(unsigned int slot) > +{ > + pte_t pte = {}; > + > + write_pte(&xen_fixmap[slot], pte); > +} > + > +#endif /* __ASM_PMAP_H__ */ > diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c > index d69a174b5d..445319af08 100644 > --- a/xen/arch/riscv/mm.c > +++ b/xen/arch/riscv/mm.c > @@ -370,3 +370,17 @@ int map_pages_to_xen(unsigned long virt, > BUG_ON("unimplemented"); > return -1; > } > + > +static inline pte_t mfn_to_pte(mfn_t mfn) > +{ > + unsigned long pte = mfn_x(mfn) << PTE_PPN_SHIFT; > + return (pte_t){ .pte = pte}; > +} > + > +inline pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr) > +{ > + /* there is no attr field in RISC-V's pte */ > + (void) attr; Surely you have a way to say indicate whether an entry is readable/writable? > + > + return mfn_to_pte(mfn); > +} Cheers,
On 12.07.2024 18:22, Oleksii Kurochko wrote: > --- /dev/null > +++ b/xen/arch/riscv/include/asm/pmap.h > @@ -0,0 +1,28 @@ > +#ifndef __ASM_PMAP_H__ > +#define __ASM_PMAP_H__ > + > +#include <xen/bug.h> > +#include <xen/mm.h> > + > +#include <asm/fixmap.h> > + > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > +{ > + pte_t *entry = &xen_fixmap[slot]; > + pte_t pte; > + > + ASSERT(!pte_is_valid(*entry)); > + > + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > + pte.pte |= PTE_LEAF_DEFAULT; > + write_pte(entry, pte); > +} > + > +static inline void arch_pmap_unmap(unsigned int slot) > +{ > + pte_t pte = {}; > + > + write_pte(&xen_fixmap[slot], pte); > +} Why are these not using set_fixmap() / clear_fixmap() respectively? > --- a/xen/arch/riscv/mm.c > +++ b/xen/arch/riscv/mm.c > @@ -370,3 +370,17 @@ int map_pages_to_xen(unsigned long virt, > BUG_ON("unimplemented"); > return -1; > } > + > +static inline pte_t mfn_to_pte(mfn_t mfn) This name suggests (to me) that you're getting _the_ (single) PTE for a given MFN. However, what the function is doing is make a PTE using the given MFN. On x86 at least the common way to name such a function would be pte_from_mfn(). > +{ > + unsigned long pte = mfn_x(mfn) << PTE_PPN_SHIFT; > + return (pte_t){ .pte = pte}; Nit: Blank missing. Jan
On 21.07.2024 10:51, Julien Grall wrote: > On 12/07/2024 17:22, Oleksii Kurochko wrote: >> +inline pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr) >> +{ >> + /* there is no attr field in RISC-V's pte */ >> + (void) attr; > > Surely you have a way to say indicate whether an entry is readable/writable? I'm puzzled by this question. The sole outlier in Arm code is pmap.h, in passing PAGE_HYPERVISOR_RW to this function when all others pass memory types (MT_*). Afaics only the low three bits are then used in the function, discarding access control bits altogether. R/W access appears to be implied. Jan
Hi Julien, On Sun, 2024-07-21 at 09:51 +0100, Julien Grall wrote: > Hi Oleksii, > > On 12/07/2024 17:22, Oleksii Kurochko wrote: > > Introduces arch_pmap_{un}map functions and select HAS_PMAP > > for CONFIG_RISCV. > > > > Additionaly it was necessary to introduce functions: > > - mfn_to_xen_entry > > - mfn_to_pte > > > > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> > > --- > > Changes in V2: > > - newly introduced patch > > --- > > xen/arch/riscv/Kconfig | 1 + > > xen/arch/riscv/include/asm/page.h | 2 ++ > > xen/arch/riscv/include/asm/pmap.h | 28 > > ++++++++++++++++++++++++++++ > > xen/arch/riscv/mm.c | 14 ++++++++++++++ > > 4 files changed, 45 insertions(+) > > create mode 100644 xen/arch/riscv/include/asm/pmap.h > > > > diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig > > index 259eea8d3b..0112aa8778 100644 > > --- a/xen/arch/riscv/Kconfig > > +++ b/xen/arch/riscv/Kconfig > > @@ -3,6 +3,7 @@ config RISCV > > select FUNCTION_ALIGNMENT_16B > > select GENERIC_BUG_FRAME > > select HAS_DEVICE_TREE > > + select HAS_PMAP > > > > config RISCV_64 > > def_bool y > > diff --git a/xen/arch/riscv/include/asm/page.h > > b/xen/arch/riscv/include/asm/page.h > > index cbbf3656d1..339074d502 100644 > > --- a/xen/arch/riscv/include/asm/page.h > > +++ b/xen/arch/riscv/include/asm/page.h > > @@ -51,6 +51,8 @@ typedef struct { > > #endif > > } pte_t; > > > > +pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr); > > + > > static inline pte_t paddr_to_pte(paddr_t paddr, > > unsigned int permissions) > > { > > diff --git a/xen/arch/riscv/include/asm/pmap.h > > b/xen/arch/riscv/include/asm/pmap.h > > new file mode 100644 > > index 0000000000..eb4c48515c > > --- /dev/null > > +++ b/xen/arch/riscv/include/asm/pmap.h > > @@ -0,0 +1,28 @@ > > +#ifndef __ASM_PMAP_H__ > > +#define __ASM_PMAP_H__ > > + > > +#include <xen/bug.h> > > +#include <xen/mm.h> > > + > > +#include <asm/fixmap.h> > > + > > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > > +{ > > + pte_t *entry = &xen_fixmap[slot]; > > + pte_t pte; > > + > > + ASSERT(!pte_is_valid(*entry)); > > + > > + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > > + pte.pte |= PTE_LEAF_DEFAULT; > > + write_pte(entry, pte); > > +} > > + > > +static inline void arch_pmap_unmap(unsigned int slot) > > +{ > > + pte_t pte = {}; > > + > > + write_pte(&xen_fixmap[slot], pte); > > +} > > + > > +#endif /* __ASM_PMAP_H__ */ > > diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c > > index d69a174b5d..445319af08 100644 > > --- a/xen/arch/riscv/mm.c > > +++ b/xen/arch/riscv/mm.c > > @@ -370,3 +370,17 @@ int map_pages_to_xen(unsigned long virt, > > BUG_ON("unimplemented"); > > return -1; > > } > > + > > +static inline pte_t mfn_to_pte(mfn_t mfn) > > +{ > > + unsigned long pte = mfn_x(mfn) << PTE_PPN_SHIFT; > > + return (pte_t){ .pte = pte}; > > +} > > + > > +inline pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr) > > +{ > > + /* there is no attr field in RISC-V's pte */ > > + (void) attr; > > Surely you have a way to say indicate whether an entry is > readable/writable? Sure, there is a way. But probably I misinterpreted attr for Arm and decided not to use them here. By that I mean, that Arm has MT_NORMAL, MT_DEVICE which RISC-V doesn't have. If it is about readable/writable then for sure, I will start to use attr. ~ Oleksii > > > + > > + return mfn_to_pte(mfn); > > +} > > Cheers, >
On Mon, 2024-07-22 at 14:54 +0200, Jan Beulich wrote: > On 12.07.2024 18:22, Oleksii Kurochko wrote: > > --- /dev/null > > +++ b/xen/arch/riscv/include/asm/pmap.h > > @@ -0,0 +1,28 @@ > > +#ifndef __ASM_PMAP_H__ > > +#define __ASM_PMAP_H__ > > + > > +#include <xen/bug.h> > > +#include <xen/mm.h> > > + > > +#include <asm/fixmap.h> > > + > > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > > +{ > > + pte_t *entry = &xen_fixmap[slot]; > > + pte_t pte; > > + > > + ASSERT(!pte_is_valid(*entry)); > > + > > + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > > + pte.pte |= PTE_LEAF_DEFAULT; > > + write_pte(entry, pte); > > +} > > + > > +static inline void arch_pmap_unmap(unsigned int slot) > > +{ > > + pte_t pte = {}; > > + > > + write_pte(&xen_fixmap[slot], pte); > > +} > > Why are these not using set_fixmap() / clear_fixmap() respectively? They haven't been introduced yet. And I thought that these fucntion are used only in pmap_{un}map() and that is the reason why I decided to not introduce them. But while writing the answer on another comment, I found other places where set_fixmap() / clear_fixmap() are used, so I will introduce them and reuse here. > > > --- a/xen/arch/riscv/mm.c > > +++ b/xen/arch/riscv/mm.c > > @@ -370,3 +370,17 @@ int map_pages_to_xen(unsigned long virt, > > BUG_ON("unimplemented"); > > return -1; > > } > > + > > +static inline pte_t mfn_to_pte(mfn_t mfn) > > This name suggests (to me) that you're getting _the_ (single) PTE for > a given MFN. However, what the function is doing is make a PTE using > the given MFN. On x86 at least the common way to name such a function > would be pte_from_mfn(). If it is a common way then I will rename it. Thanks. ~ Oleksii > > > +{ > > + unsigned long pte = mfn_x(mfn) << PTE_PPN_SHIFT; > > + return (pte_t){ .pte = pte}; > > Nit: Blank missing. > > Jan
Hi, On 22/07/2024 15:44, Oleksii Kurochko wrote: > On Mon, 2024-07-22 at 14:54 +0200, Jan Beulich wrote: >> On 12.07.2024 18:22, Oleksii Kurochko wrote: >>> --- /dev/null >>> +++ b/xen/arch/riscv/include/asm/pmap.h >>> @@ -0,0 +1,28 @@ >>> +#ifndef __ASM_PMAP_H__ >>> +#define __ASM_PMAP_H__ >>> + >>> +#include <xen/bug.h> >>> +#include <xen/mm.h> >>> + >>> +#include <asm/fixmap.h> >>> + >>> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) >>> +{ >>> + pte_t *entry = &xen_fixmap[slot]; >>> + pte_t pte; >>> + >>> + ASSERT(!pte_is_valid(*entry)); >>> + >>> + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); >>> + pte.pte |= PTE_LEAF_DEFAULT; >>> + write_pte(entry, pte); >>> +} >>> + >>> +static inline void arch_pmap_unmap(unsigned int slot) >>> +{ >>> + pte_t pte = {}; >>> + >>> + write_pte(&xen_fixmap[slot], pte); >>> +} >> >> Why are these not using set_fixmap() / clear_fixmap() respectively? > They haven't been introduced yet. And I thought that these fucntion are > used only in pmap_{un}map() and that is the reason why I decided to not > introduce them. But while writing the answer on another comment, I > found other places where set_fixmap() / clear_fixmap() are used, so I > will introduce them and reuse here. I am guessing you are going to implement set_fixmap()/clear_fixmap() using map_pages_to_xen(). If so, for early boot you are going to end up in a circular loop because map_pages_to_xen() will likely use pmap() which will call set_fixmap(). There is a big comment in common/pmap.c which explain why arch_pmap_* was introduced rather than calling *_fixmap() directly: /* * We cannot use set_fixmap() here. We use PMAP when the domain map * page infrastructure is not yet initialized, so map_pages_to_xen() called * by set_fixmap() needs to map pages on demand, which then calls pmap() * again, resulting in a loop. Modify the PTEs directly instead. The same * is true for pmap_unmap(). */ Cheers,
On 22/07/2024 13:58, Jan Beulich wrote: > On 21.07.2024 10:51, Julien Grall wrote: >> On 12/07/2024 17:22, Oleksii Kurochko wrote: >>> +inline pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr) >>> +{ >>> + /* there is no attr field in RISC-V's pte */ >>> + (void) attr; >> >> Surely you have a way to say indicate whether an entry is readable/writable? > > I'm puzzled by this question. The sole outlier in Arm code is pmap.h, in > passing PAGE_HYPERVISOR_RW to this function when all others pass memory > types (MT_*). > Afaics only the low three bits are then used in the > function, discarding access control bits altogether. R/W access appears > to be implied. Arm is not exempt of odd interfaces. However, from just this patch, it is not clear why RISC-V would continue to use the same appropach if the attributes doesn't exist. Looking at the rest of the series, it seems to be because we wanted to have a generic page-table code. If there are a desire to continue towards this direction, then I would assume we would need a arch specific way to set the read/write bit. At which point it makes a lot more sense to push setting the access bits in mfn_to_xen_entry(). Even if we don't do any code consolidation, I think it is odd for Arm that a caller will assume mfn_to_xen_entry() will always return a read-writable page and update as necessary. It would be nicer to push this decision to mfn_to_xen_entry(). Cheers,
On Mon, 2024-07-22 at 15:48 +0100, Julien Grall wrote: > Hi, > > On 22/07/2024 15:44, Oleksii Kurochko wrote: > > On Mon, 2024-07-22 at 14:54 +0200, Jan Beulich wrote: > > > On 12.07.2024 18:22, Oleksii Kurochko wrote: > > > > --- /dev/null > > > > +++ b/xen/arch/riscv/include/asm/pmap.h > > > > @@ -0,0 +1,28 @@ > > > > +#ifndef __ASM_PMAP_H__ > > > > +#define __ASM_PMAP_H__ > > > > + > > > > +#include <xen/bug.h> > > > > +#include <xen/mm.h> > > > > + > > > > +#include <asm/fixmap.h> > > > > + > > > > +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > > > > +{ > > > > + pte_t *entry = &xen_fixmap[slot]; > > > > + pte_t pte; > > > > + > > > > + ASSERT(!pte_is_valid(*entry)); > > > > + > > > > + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > > > > + pte.pte |= PTE_LEAF_DEFAULT; > > > > + write_pte(entry, pte); > > > > +} > > > > + > > > > +static inline void arch_pmap_unmap(unsigned int slot) > > > > +{ > > > > + pte_t pte = {}; > > > > + > > > > + write_pte(&xen_fixmap[slot], pte); > > > > +} > > > > > > Why are these not using set_fixmap() / clear_fixmap() > > > respectively? > > They haven't been introduced yet. And I thought that these fucntion > > are > > used only in pmap_{un}map() and that is the reason why I decided to > > not > > introduce them. But while writing the answer on another comment, I > > found other places where set_fixmap() / clear_fixmap() are used, so > > I > > will introduce them and reuse here. > > I am guessing you are going to implement set_fixmap()/clear_fixmap() > using map_pages_to_xen(). If so, for early boot you are going to end > up > in a circular loop because map_pages_to_xen() will likely use pmap() > which will call set_fixmap(). I am going to implement that in the following way as I faced the described by you issue when I first time tried to implement it using map_pages_to_xen(): /* Map a 4k page in a fixmap entry */ void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags) { pte_t pte; pte = mfn_to_xen_entry(mfn, flags); pte.pte |= PTE_LEAF_DEFAULT; write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); } /* Remove a mapping from a fixmap entry */ void clear_fixmap(unsigned map) { pte_t pte = {0}; write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); } ~ Oleksii > > There is a big comment in common/pmap.c which explain why arch_pmap_* > was introduced rather than calling *_fixmap() directly: > > /* > * We cannot use set_fixmap() here. We use PMAP when the domain > map > * page infrastructure is not yet initialized, so > map_pages_to_xen() called > * by set_fixmap() needs to map pages on demand, which then > calls > pmap() > * again, resulting in a loop. Modify the PTEs directly instead. > The same > * is true for pmap_unmap(). > */ > > Cheers, >
Hi, On 22/07/2024 18:09, Oleksii Kurochko wrote: > On Mon, 2024-07-22 at 15:48 +0100, Julien Grall wrote: >> Hi, >> >> On 22/07/2024 15:44, Oleksii Kurochko wrote: >>> On Mon, 2024-07-22 at 14:54 +0200, Jan Beulich wrote: >>>> On 12.07.2024 18:22, Oleksii Kurochko wrote: >>>>> --- /dev/null >>>>> +++ b/xen/arch/riscv/include/asm/pmap.h >>>>> @@ -0,0 +1,28 @@ >>>>> +#ifndef __ASM_PMAP_H__ >>>>> +#define __ASM_PMAP_H__ >>>>> + >>>>> +#include <xen/bug.h> >>>>> +#include <xen/mm.h> >>>>> + >>>>> +#include <asm/fixmap.h> >>>>> + >>>>> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) >>>>> +{ >>>>> + pte_t *entry = &xen_fixmap[slot]; >>>>> + pte_t pte; >>>>> + >>>>> + ASSERT(!pte_is_valid(*entry)); >>>>> + >>>>> + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); >>>>> + pte.pte |= PTE_LEAF_DEFAULT; >>>>> + write_pte(entry, pte); >>>>> +} >>>>> + >>>>> +static inline void arch_pmap_unmap(unsigned int slot) >>>>> +{ >>>>> + pte_t pte = {}; >>>>> + >>>>> + write_pte(&xen_fixmap[slot], pte); >>>>> +} >>>> >>>> Why are these not using set_fixmap() / clear_fixmap() >>>> respectively? >>> They haven't been introduced yet. And I thought that these fucntion >>> are >>> used only in pmap_{un}map() and that is the reason why I decided to >>> not >>> introduce them. But while writing the answer on another comment, I >>> found other places where set_fixmap() / clear_fixmap() are used, so >>> I >>> will introduce them and reuse here. >> >> I am guessing you are going to implement set_fixmap()/clear_fixmap() >> using map_pages_to_xen(). If so, for early boot you are going to end >> up >> in a circular loop because map_pages_to_xen() will likely use pmap() >> which will call set_fixmap(). > I am going to implement that in the following way as I faced the > described by you issue when I first time tried to implement it using > map_pages_to_xen(): What's wrong with keeping the arch_pmap_*() as-is and call map_pages_to_xen() in the fixmap? At least this would be consistent with what other architectures does and less risky (see below). > /* Map a 4k page in a fixmap entry */ > void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags) > { > pte_t pte; > > pte = mfn_to_xen_entry(mfn, flags); > pte.pte |= PTE_LEAF_DEFAULT; > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); It would be saner to check if you are not overwriting any existing mapping as otherwise you will probably need a TLB flush. > } > > /* Remove a mapping from a fixmap entry */ > void clear_fixmap(unsigned map) > { > pte_t pte = {0}; > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); Don't you need a TLB flush? Cheers,
Hi Julien, On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> wrote: > Hi, > > >> On 22/07/2024 15:44, Oleksii Kurochko wrote: > >>> On Mon, 2024-07-22 at 14:54 +0200, Jan Beulich wrote: > >>>> On 12.07.2024 18:22, Oleksii Kurochko wrote: > >>>>> --- /dev/null > >>>>> +++ b/xen/arch/riscv/include/asm/pmap.h > >>>>> @@ -0,0 +1,28 @@ > >>>>> +#ifndef __ASM_PMAP_H__ > >>>>> +#define __ASM_PMAP_H__ > >>>>> + > >>>>> +#include <xen/bug.h> > >>>>> +#include <xen/mm.h> > >>>>> + > >>>>> +#include <asm/fixmap.h> > >>>>> + > >>>>> +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > >>>>> +{ > >>>>> + pte_t *entry = &xen_fixmap[slot]; > >>>>> + pte_t pte; > >>>>> + > >>>>> + ASSERT(!pte_is_valid(*entry)); > >>>>> + > >>>>> + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > >>>>> + pte.pte |= PTE_LEAF_DEFAULT; > >>>>> + write_pte(entry, pte); > >>>>> +} > >>>>> + > >>>>> +static inline void arch_pmap_unmap(unsigned int slot) > >>>>> +{ > >>>>> + pte_t pte = {}; > >>>>> + > >>>>> + write_pte(&xen_fixmap[slot], pte); > >>>>> +} > >>>> > >>>> Why are these not using set_fixmap() / clear_fixmap() > >>>> respectively? > >>> They haven't been introduced yet. And I thought that these fucntion > >>> are > >>> used only in pmap_{un}map() and that is the reason why I decided to > >>> not > >>> introduce them. But while writing the answer on another comment, I > >>> found other places where set_fixmap() / clear_fixmap() are used, so > >>> I > >>> will introduce them and reuse here. > >> > >> I am guessing you are going to implement set_fixmap()/clear_fixmap() > >> using map_pages_to_xen(). If so, for early boot you are going to end > >> up > >> in a circular loop because map_pages_to_xen() will likely use pmap() > >> which will call set_fixmap(). > > I am going to implement that in the following way as I faced the > > described by you issue when I first time tried to implement it using > > map_pages_to_xen(): > What's wrong with keeping the arch_pmap_*() as-is and call > map_pages_to_xen() in the fixmap? > > At least this would be consistent with what other architectures does and > less risky (see below). > Then I misunderstood you, if not to use {set/clear}_fixmap() in arch_pmap() then everything should be fine. Then I think it is needed to add the comment also above arch_pmap_*() function why it isn't used {set/clear}_fixmap() inside. ( or update the commit message ) > > > /* Map a 4k page in a fixmap entry */ > > void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags) > > { > > pte_t pte; > > > > pte = mfn_to_xen_entry(mfn, flags); > > pte.pte |= PTE_LEAF_DEFAULT; > > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); > > It would be saner to check if you are not overwriting any existing > mapping as otherwise you will probably need a TLB flush. > > > } > > > > /* Remove a mapping from a fixmap entry */ > > void clear_fixmap(unsigned map) > > { > > pte_t pte = {0}; > > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); > > Don't you need a TLB flush? > Inside write_pte() there is "sfence.vma". But probably it would be better to add flush_xen_tlb_range_va_local() or something similar here in case if someone will decide to update write_pte(). ~ Oleksii
On 23.07.2024 10:02, Oleksii Kurochko wrote: > On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> wrote: >>>> On 22/07/2024 15:44, Oleksii Kurochko wrote: >>> /* Map a 4k page in a fixmap entry */ >>> void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags) >>> { >>> pte_t pte; >>> >>> pte = mfn_to_xen_entry(mfn, flags); >>> pte.pte |= PTE_LEAF_DEFAULT; >>> write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); >> >> It would be saner to check if you are not overwriting any existing >> mapping as otherwise you will probably need a TLB flush. >> >>> } >>> >>> /* Remove a mapping from a fixmap entry */ >>> void clear_fixmap(unsigned map) >>> { >>> pte_t pte = {0}; >>> write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], pte); >> >> Don't you need a TLB flush? >> > Inside write_pte() there is "sfence.vma". That's just a fence though, not a TLB flush. Jan
On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: > On 23.07.2024 10:02, Oleksii Kurochko wrote: > > On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> > > wrote: > > > > > On 22/07/2024 15:44, Oleksii Kurochko wrote: > > > > /* Map a 4k page in a fixmap entry */ > > > > void set_fixmap(unsigned map, mfn_t mfn, unsigned int > > > > flags) > > > > { > > > > pte_t pte; > > > > > > > > pte = mfn_to_xen_entry(mfn, flags); > > > > pte.pte |= PTE_LEAF_DEFAULT; > > > > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], > > > > pte); > > > > > > It would be saner to check if you are not overwriting any > > > existing > > > mapping as otherwise you will probably need a TLB flush. > > > > > > > } > > > > > > > > /* Remove a mapping from a fixmap entry */ > > > > void clear_fixmap(unsigned map) > > > > { > > > > pte_t pte = {0}; > > > > write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], > > > > pte); > > > > > > Don't you need a TLB flush? > > > > > Inside write_pte() there is "sfence.vma". > > That's just a fence though, not a TLB flush. From the privileged doc: ``` SFENCE.VMA is also used to invalidate entries in the address-translation cache associated with a hart (see Section 4.3.2). ... The SFENCE.VMA is used to flush any local hardware caches related to address translation. It is specified as a fence rather than a TLB flush to provide cleaner semantics with respect to which instructions are affected by the flush operation and to support a wider variety of dynamic caching structures and memory-management schemes. SFENCE.VMA is also used by higher privilege levels to synchronize page table writes and the address translation hardware. ... ``` I read this as SFENCE.VMA is used not only for ordering of load/stores, but also to flush TLB ( which is a type of more general term as address-translation cache, IIUIC ). Also, Linux kernel uses sfence.vma to flush TLB: https://elixir.bootlin.com/linux/v6.0/source/arch/riscv/include/asm/tlbflush.h#L23 ~ Oleksii
On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: >> On 23.07.2024 10:02, Oleksii Kurochko wrote: >>> On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> >>> wrote: >>>>>> On 22/07/2024 15:44, Oleksii Kurochko wrote: >>>>> /* Map a 4k page in a fixmap entry */ >>>>> void set_fixmap(unsigned map, mfn_t mfn, unsigned int >>>>> flags) >>>>> { >>>>> pte_t pte; >>>>> >>>>> pte = mfn_to_xen_entry(mfn, flags); >>>>> pte.pte |= PTE_LEAF_DEFAULT; >>>>> write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], >>>>> pte); >>>> >>>> It would be saner to check if you are not overwriting any >>>> existing >>>> mapping as otherwise you will probably need a TLB flush. >>>> >>>>> } >>>>> >>>>> /* Remove a mapping from a fixmap entry */ >>>>> void clear_fixmap(unsigned map) >>>>> { >>>>> pte_t pte = {0}; >>>>> write_pte(&xen_fixmap[pt_index(0, FIXMAP_ADDR(map))], >>>>> pte); >>>> >>>> Don't you need a TLB flush? >>>> >>> Inside write_pte() there is "sfence.vma". >> >> That's just a fence though, not a TLB flush. > From the privileged doc: > ``` > SFENCE.VMA is also used to invalidate entries in the > address-translation cache associated with a hart (see Section 4.3.2). > ... > The SFENCE.VMA is used to flush any local hardware caches related to > address translation. > It is specified as a fence rather than a TLB flush to provide cleaner > semantics with respect to > which instructions are affected by the flush operation and to support a > wider variety of dynamic > caching structures and memory-management schemes. SFENCE.VMA is also > used by higher > privilege levels to synchronize page table writes and the address > translation hardware. > ... > ``` > I read this as SFENCE.VMA is used not only for ordering of load/stores, > but also to flush TLB ( which is a type of more general term as > address-translation cache, IIUIC ). Oh, I see. Kind of unexpected for an instruction of that name. Yet note how they talk about the local hart only. You need a wider scope TLB flush here. Jan
On Tue, 2024-07-23 at 12:02 +0200, Jan Beulich wrote: > On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: > > On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: > > > On 23.07.2024 10:02, Oleksii Kurochko wrote: > > > > On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> > > > > wrote: > > > > > > > On 22/07/2024 15:44, Oleksii Kurochko wrote: > > > > > > /* Map a 4k page in a fixmap entry */ > > > > > > void set_fixmap(unsigned map, mfn_t mfn, unsigned int > > > > > > flags) > > > > > > { > > > > > > pte_t pte; > > > > > > > > > > > > pte = mfn_to_xen_entry(mfn, flags); > > > > > > pte.pte |= PTE_LEAF_DEFAULT; > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > FIXMAP_ADDR(map))], > > > > > > pte); > > > > > > > > > > It would be saner to check if you are not overwriting any > > > > > existing > > > > > mapping as otherwise you will probably need a TLB flush. > > > > > > > > > > > } > > > > > > > > > > > > /* Remove a mapping from a fixmap entry */ > > > > > > void clear_fixmap(unsigned map) > > > > > > { > > > > > > pte_t pte = {0}; > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > FIXMAP_ADDR(map))], > > > > > > pte); > > > > > > > > > > Don't you need a TLB flush? > > > > > > > > > Inside write_pte() there is "sfence.vma". > > > > > > That's just a fence though, not a TLB flush. > > From the privileged doc: > > ``` > > SFENCE.VMA is also used to invalidate entries in the > > address-translation cache associated with a hart (see Section > > 4.3.2). > > ... > > The SFENCE.VMA is used to flush any local hardware caches > > related to > > address translation. > > It is specified as a fence rather than a TLB flush to provide > > cleaner > > semantics with respect to > > which instructions are affected by the flush operation and to > > support a > > wider variety of dynamic > > caching structures and memory-management schemes. SFENCE.VMA is > > also > > used by higher > > privilege levels to synchronize page table writes and the > > address > > translation hardware. > > ... > > ``` > > I read this as SFENCE.VMA is used not only for ordering of > > load/stores, > > but also to flush TLB ( which is a type of more general term as > > address-translation cache, IIUIC ). > > Oh, I see. Kind of unexpected for an instruction of that name. Yet > note > how they talk about the local hart only. You need a wider scope TLB > flush here. Could you please clarify why it is needed wider? Arm Xen flushed only local TLB. RISC-V Linux kernel for fixmap also uses: local_flush_tlb_page(). ~ Oleksii
Hi Oleksii, On 23/07/2024 16:36, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-07-23 at 12:02 +0200, Jan Beulich wrote: >> On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: >>> On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: >>>> On 23.07.2024 10:02, Oleksii Kurochko wrote: >>>>> On Mon, Jul 22, 2024 at 7:27 PM Julien Grall <julien@xen.org> >>>>> wrote: >>>>>>>> On 22/07/2024 15:44, Oleksii Kurochko wrote: >>>>>>> /* Map a 4k page in a fixmap entry */ >>>>>>> void set_fixmap(unsigned map, mfn_t mfn, unsigned int >>>>>>> flags) >>>>>>> { >>>>>>> pte_t pte; >>>>>>> >>>>>>> pte = mfn_to_xen_entry(mfn, flags); >>>>>>> pte.pte |= PTE_LEAF_DEFAULT; >>>>>>> write_pte(&xen_fixmap[pt_index(0, >>>>>>> FIXMAP_ADDR(map))], >>>>>>> pte); >>>>>> >>>>>> It would be saner to check if you are not overwriting any >>>>>> existing >>>>>> mapping as otherwise you will probably need a TLB flush. >>>>>> >>>>>>> } >>>>>>> >>>>>>> /* Remove a mapping from a fixmap entry */ >>>>>>> void clear_fixmap(unsigned map) >>>>>>> { >>>>>>> pte_t pte = {0}; >>>>>>> write_pte(&xen_fixmap[pt_index(0, >>>>>>> FIXMAP_ADDR(map))], >>>>>>> pte); >>>>>> >>>>>> Don't you need a TLB flush? >>>>>> >>>>> Inside write_pte() there is "sfence.vma". >>>> >>>> That's just a fence though, not a TLB flush. >>> From the privileged doc: >>> ``` >>> SFENCE.VMA is also used to invalidate entries in the >>> address-translation cache associated with a hart (see Section >>> 4.3.2). >>> ... >>> The SFENCE.VMA is used to flush any local hardware caches >>> related to >>> address translation. >>> It is specified as a fence rather than a TLB flush to provide >>> cleaner >>> semantics with respect to >>> which instructions are affected by the flush operation and to >>> support a >>> wider variety of dynamic >>> caching structures and memory-management schemes. SFENCE.VMA is >>> also >>> used by higher >>> privilege levels to synchronize page table writes and the >>> address >>> translation hardware. >>> ... >>> ``` >>> I read this as SFENCE.VMA is used not only for ordering of >>> load/stores, >>> but also to flush TLB ( which is a type of more general term as >>> address-translation cache, IIUIC ). I have to admit, I am a little because concerned with calling sfence.vma in write_pte() (this may only be because I am not very familiar with RISC-V). We have cases where multiple entry will be written in a single map_pages_to_xen() call. So wouldn't this means that the local TLBs would be nuked for every write rather than once? >> >> Oh, I see. Kind of unexpected for an instruction of that name. Yet >> note >> how they talk about the local hart only. You need a wider scope TLB >> flush here. > Could you please clarify why it is needed wider? > > Arm Xen flushed only local TLB. Which code are you looking at? set_fixmap() will propagate the TLB flush to all innershareable CPUs. The PMAP interface will do a local TLB flush because the interface can only be used during early boot where there is a single CPU running. > RISC-V Linux kernel for fixmap also uses: local_flush_tlb_page(). I don't know how Linux is using set_fixmap(). But what matters is how Xen is using set_fixmap(). We have a couple of places in Xen where the fixmap needs to be accessed by all the CPUs. Given this is a common interface in Xen, I think it makes sense to follow the same approach to avoid any confusion. Cheers,
On Tue, 2024-07-23 at 16:49 +0100, Julien Grall wrote: > Hi Oleksii, > > On 23/07/2024 16:36, oleksii.kurochko@gmail.com wrote: > > On Tue, 2024-07-23 at 12:02 +0200, Jan Beulich wrote: > > > On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: > > > > On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: > > > > > On 23.07.2024 10:02, Oleksii Kurochko wrote: > > > > > > On Mon, Jul 22, 2024 at 7:27 PM Julien Grall > > > > > > <julien@xen.org> > > > > > > wrote: > > > > > > > > > On 22/07/2024 15:44, Oleksii Kurochko wrote: > > > > > > > > /* Map a 4k page in a fixmap entry */ > > > > > > > > void set_fixmap(unsigned map, mfn_t mfn, unsigned > > > > > > > > int > > > > > > > > flags) > > > > > > > > { > > > > > > > > pte_t pte; > > > > > > > > > > > > > > > > pte = mfn_to_xen_entry(mfn, flags); > > > > > > > > pte.pte |= PTE_LEAF_DEFAULT; > > > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > > > FIXMAP_ADDR(map))], > > > > > > > > pte); > > > > > > > > > > > > > > It would be saner to check if you are not overwriting any > > > > > > > existing > > > > > > > mapping as otherwise you will probably need a TLB flush. > > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > /* Remove a mapping from a fixmap entry */ > > > > > > > > void clear_fixmap(unsigned map) > > > > > > > > { > > > > > > > > pte_t pte = {0}; > > > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > > > FIXMAP_ADDR(map))], > > > > > > > > pte); > > > > > > > > > > > > > > Don't you need a TLB flush? > > > > > > > > > > > > > Inside write_pte() there is "sfence.vma". > > > > > > > > > > That's just a fence though, not a TLB flush. > > > > From the privileged doc: > > > > ``` > > > > SFENCE.VMA is also used to invalidate entries in the > > > > address-translation cache associated with a hart (see > > > > Section > > > > 4.3.2). > > > > ... > > > > The SFENCE.VMA is used to flush any local hardware caches > > > > related to > > > > address translation. > > > > It is specified as a fence rather than a TLB flush to > > > > provide > > > > cleaner > > > > semantics with respect to > > > > which instructions are affected by the flush operation and > > > > to > > > > support a > > > > wider variety of dynamic > > > > caching structures and memory-management schemes. > > > > SFENCE.VMA is > > > > also > > > > used by higher > > > > privilege levels to synchronize page table writes and the > > > > address > > > > translation hardware. > > > > ... > > > > ``` > > > > I read this as SFENCE.VMA is used not only for ordering of > > > > load/stores, > > > > but also to flush TLB ( which is a type of more general term as > > > > address-translation cache, IIUIC ). > I have to admit, I am a little because concerned with calling > sfence.vma > in write_pte() (this may only be because I am not very familiar with > RISC-V). > > We have cases where multiple entry will be written in a single > map_pages_to_xen() call. So wouldn't this means that the local TLBs > would be nuked for every write rather than once? Yes, it will be nuked. It is bad from perfomance point of view. I just wanted to be sure that I won't miss to put sfence.vma when it is necessary and then reworked that a little bit after. But it seems it would be better not to call sfence.vma in write_pte() just from the start. > > > > > > > > Oh, I see. Kind of unexpected for an instruction of that name. > > > Yet > > > note > > > how they talk about the local hart only. You need a wider scope > > > TLB > > > flush here. > > Could you please clarify why it is needed wider? > > > > Arm Xen flushed only local TLB. > > Which code are you looking at? set_fixmap() will propagate the TLB > flush > to all innershareable CPUs. Yes, here I agree that set_fixmap() uses map_pages_to_xen which somewhere inside uses flush_xen_tlb_range_va() ( not flush_xen_tlb_range_va() ) so TLB flush will happen for all innershareable CPUs. > > The PMAP interface will do a local TLB flush because the interface > can > only be used during early boot where there is a single CPU running. Yes, I am looking at PMAP: static inline void arch_pmap_unmap(unsigned int slot) { lpae_t pte = {}; write_pte(&xen_fixmap[slot], pte); flush_xen_tlb_range_va(FIXMAP_ADDR(slot), PAGE_SIZE); } IIUC, originaly Jan told about arch_pmap_unmap() case so that is why I decided to clarify additionaly. > > > RISC-V Linux kernel for fixmap also uses: local_flush_tlb_page(). > > I don't know how Linux is using set_fixmap(). But what matters is how > Xen is using set_fixmap(). We have a couple of places in Xen where > the > fixmap needs to be accessed by all the CPUs. > > Given this is a common interface in Xen, I think it makes sense to > follow the same approach to avoid any confusion. Sure. Thanks for claryfying. I will flush_xen_tlb_range_va() for set_fixmap(). ~ Oleksii
On Tue, 2024-07-23 at 19:25 +0200, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-07-23 at 16:49 +0100, Julien Grall wrote: > > Hi Oleksii, > > > > On 23/07/2024 16:36, oleksii.kurochko@gmail.com wrote: > > > On Tue, 2024-07-23 at 12:02 +0200, Jan Beulich wrote: > > > > On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: > > > > > On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: > > > > > > On 23.07.2024 10:02, Oleksii Kurochko wrote: > > > > > > > On Mon, Jul 22, 2024 at 7:27 PM Julien Grall > > > > > > > <julien@xen.org> > > > > > > > wrote: > > > > > > > > > > On 22/07/2024 15:44, Oleksii Kurochko wrote: > > > > > > > > > /* Map a 4k page in a fixmap entry */ > > > > > > > > > void set_fixmap(unsigned map, mfn_t mfn, > > > > > > > > > unsigned > > > > > > > > > int > > > > > > > > > flags) > > > > > > > > > { > > > > > > > > > pte_t pte; > > > > > > > > > > > > > > > > > > pte = mfn_to_xen_entry(mfn, flags); > > > > > > > > > pte.pte |= PTE_LEAF_DEFAULT; > > > > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > > > > FIXMAP_ADDR(map))], > > > > > > > > > pte); > > > > > > > > > > > > > > > > It would be saner to check if you are not overwriting > > > > > > > > any > > > > > > > > existing > > > > > > > > mapping as otherwise you will probably need a TLB > > > > > > > > flush. > > > > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > /* Remove a mapping from a fixmap entry */ > > > > > > > > > void clear_fixmap(unsigned map) > > > > > > > > > { > > > > > > > > > pte_t pte = {0}; > > > > > > > > > write_pte(&xen_fixmap[pt_index(0, > > > > > > > > > FIXMAP_ADDR(map))], > > > > > > > > > pte); > > > > > > > > > > > > > > > > Don't you need a TLB flush? > > > > > > > > > > > > > > > Inside write_pte() there is "sfence.vma". > > > > > > > > > > > > That's just a fence though, not a TLB flush. > > > > > From the privileged doc: > > > > > ``` > > > > > SFENCE.VMA is also used to invalidate entries in the > > > > > address-translation cache associated with a hart (see > > > > > Section > > > > > 4.3.2). > > > > > ... > > > > > The SFENCE.VMA is used to flush any local hardware caches > > > > > related to > > > > > address translation. > > > > > It is specified as a fence rather than a TLB flush to > > > > > provide > > > > > cleaner > > > > > semantics with respect to > > > > > which instructions are affected by the flush operation > > > > > and > > > > > to > > > > > support a > > > > > wider variety of dynamic > > > > > caching structures and memory-management schemes. > > > > > SFENCE.VMA is > > > > > also > > > > > used by higher > > > > > privilege levels to synchronize page table writes and the > > > > > address > > > > > translation hardware. > > > > > ... > > > > > ``` > > > > > I read this as SFENCE.VMA is used not only for ordering of > > > > > load/stores, > > > > > but also to flush TLB ( which is a type of more general term > > > > > as > > > > > address-translation cache, IIUIC ). > > I have to admit, I am a little because concerned with calling > > sfence.vma > > in write_pte() (this may only be because I am not very familiar > > with > > RISC-V). > > > > We have cases where multiple entry will be written in a single > > map_pages_to_xen() call. So wouldn't this means that the local TLBs > > would be nuked for every write rather than once? > Yes, it will be nuked. It is bad from perfomance point of view. > I just wanted to be sure that I won't miss to put sfence.vma when it > is > necessary and then reworked that a little bit after. But it seems it > would be better not to call sfence.vma in write_pte() just from the > start. > > > > > > > > > > > > > > Oh, I see. Kind of unexpected for an instruction of that name. > > > > Yet > > > > note > > > > how they talk about the local hart only. You need a wider scope > > > > TLB > > > > flush here. > > > Could you please clarify why it is needed wider? > > > > > > Arm Xen flushed only local TLB. > > > > Which code are you looking at? set_fixmap() will propagate the TLB > > flush > > to all innershareable CPUs. > Yes, here I agree that set_fixmap() uses map_pages_to_xen which > somewhere inside uses flush_xen_tlb_range_va() ( not > flush_xen_tlb_range_va() ) so TLB flush will happen for all > innershareable CPUs. > > > > The PMAP interface will do a local TLB flush because the interface > > can > > only be used during early boot where there is a single CPU running. > > Yes, I am looking at PMAP: > static inline void arch_pmap_unmap(unsigned int slot) > { > lpae_t pte = {}; > > write_pte(&xen_fixmap[slot], pte); > > flush_xen_tlb_range_va(FIXMAP_ADDR(slot), PAGE_SIZE); > } > IIUC, originaly Jan told about arch_pmap_unmap() case so that is why > I > decided to clarify additionaly. Julien, I have a questation related to Arm's version of arch_pmap_map(): static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) { lpae_t *entry = &xen_fixmap[slot]; lpae_t pte; ASSERT(!lpae_is_valid(*entry)); pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); pte.pt.table = 1; write_pte(entry, pte); /* * The new entry will be used very soon after arch_pmap_map() returns. * So ensure the DSB in write_pte() has completed before continuing. */ isb(); } Is the code above isb() is correct? is it insure the DSB not ISB? And isn't need to do TLB flush here? ~ Oleksii > > > > > > RISC-V Linux kernel for fixmap also uses: local_flush_tlb_page(). > > > > I don't know how Linux is using set_fixmap(). But what matters is > > how > > Xen is using set_fixmap(). We have a couple of places in Xen where > > the > > fixmap needs to be accessed by all the CPUs. > > > > Given this is a common interface in Xen, I think it makes sense to > > follow the same approach to avoid any confusion. > Sure. Thanks for claryfying. I will flush_xen_tlb_range_va() for > set_fixmap(). > > > ~ Oleksii
On 23/07/2024 18:28, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-07-23 at 19:25 +0200, oleksii.kurochko@gmail.com wrote: >> On Tue, 2024-07-23 at 16:49 +0100, Julien Grall wrote: >>> Hi Oleksii, >>> >>> On 23/07/2024 16:36, oleksii.kurochko@gmail.com wrote: >>>> On Tue, 2024-07-23 at 12:02 +0200, Jan Beulich wrote: >>>>> On 23.07.2024 10:55, oleksii.kurochko@gmail.com wrote: >>>>>> On Tue, 2024-07-23 at 10:36 +0200, Jan Beulich wrote: >>>>>>> On 23.07.2024 10:02, Oleksii Kurochko wrote: >>>>>>>> On Mon, Jul 22, 2024 at 7:27 PM Julien Grall >>>>>>>> <julien@xen.org> >>>>>>>> wrote: >>>>>>>>>>> On 22/07/2024 15:44, Oleksii Kurochko wrote: >>>>>>>>>> /* Map a 4k page in a fixmap entry */ >>>>>>>>>> void set_fixmap(unsigned map, mfn_t mfn, >>>>>>>>>> unsigned >>>>>>>>>> int >>>>>>>>>> flags) >>>>>>>>>> { >>>>>>>>>> pte_t pte; >>>>>>>>>> >>>>>>>>>> pte = mfn_to_xen_entry(mfn, flags); >>>>>>>>>> pte.pte |= PTE_LEAF_DEFAULT; >>>>>>>>>> write_pte(&xen_fixmap[pt_index(0, >>>>>>>>>> FIXMAP_ADDR(map))], >>>>>>>>>> pte); >>>>>>>>> >>>>>>>>> It would be saner to check if you are not overwriting >>>>>>>>> any >>>>>>>>> existing >>>>>>>>> mapping as otherwise you will probably need a TLB >>>>>>>>> flush. >>>>>>>>> >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> /* Remove a mapping from a fixmap entry */ >>>>>>>>>> void clear_fixmap(unsigned map) >>>>>>>>>> { >>>>>>>>>> pte_t pte = {0}; >>>>>>>>>> write_pte(&xen_fixmap[pt_index(0, >>>>>>>>>> FIXMAP_ADDR(map))], >>>>>>>>>> pte); >>>>>>>>> >>>>>>>>> Don't you need a TLB flush? >>>>>>>>> >>>>>>>> Inside write_pte() there is "sfence.vma". >>>>>>> >>>>>>> That's just a fence though, not a TLB flush. >>>>>> From the privileged doc: >>>>>> ``` >>>>>> SFENCE.VMA is also used to invalidate entries in the >>>>>> address-translation cache associated with a hart (see >>>>>> Section >>>>>> 4.3.2). >>>>>> ... >>>>>> The SFENCE.VMA is used to flush any local hardware caches >>>>>> related to >>>>>> address translation. >>>>>> It is specified as a fence rather than a TLB flush to >>>>>> provide >>>>>> cleaner >>>>>> semantics with respect to >>>>>> which instructions are affected by the flush operation >>>>>> and >>>>>> to >>>>>> support a >>>>>> wider variety of dynamic >>>>>> caching structures and memory-management schemes. >>>>>> SFENCE.VMA is >>>>>> also >>>>>> used by higher >>>>>> privilege levels to synchronize page table writes and the >>>>>> address >>>>>> translation hardware. >>>>>> ... >>>>>> ``` >>>>>> I read this as SFENCE.VMA is used not only for ordering of >>>>>> load/stores, >>>>>> but also to flush TLB ( which is a type of more general term >>>>>> as >>>>>> address-translation cache, IIUIC ). >>> I have to admit, I am a little because concerned with calling >>> sfence.vma >>> in write_pte() (this may only be because I am not very familiar >>> with >>> RISC-V). >>> >>> We have cases where multiple entry will be written in a single >>> map_pages_to_xen() call. So wouldn't this means that the local TLBs >>> would be nuked for every write rather than once? >> Yes, it will be nuked. It is bad from perfomance point of view. >> I just wanted to be sure that I won't miss to put sfence.vma when it >> is >> necessary and then reworked that a little bit after. But it seems it >> would be better not to call sfence.vma in write_pte() just from the >> start. >> >> >>> >>> >>>>> >>>>> Oh, I see. Kind of unexpected for an instruction of that name. >>>>> Yet >>>>> note >>>>> how they talk about the local hart only. You need a wider scope >>>>> TLB >>>>> flush here. >>>> Could you please clarify why it is needed wider? >>>> >>>> Arm Xen flushed only local TLB. >>> >>> Which code are you looking at? set_fixmap() will propagate the TLB >>> flush >>> to all innershareable CPUs. >> Yes, here I agree that set_fixmap() uses map_pages_to_xen which >> somewhere inside uses flush_xen_tlb_range_va() ( not >> flush_xen_tlb_range_va() ) so TLB flush will happen for all >> innershareable CPUs. >>> >>> The PMAP interface will do a local TLB flush because the interface >>> can >>> only be used during early boot where there is a single CPU running. >> >> Yes, I am looking at PMAP: >> static inline void arch_pmap_unmap(unsigned int slot) >> { >> lpae_t pte = {}; >> >> write_pte(&xen_fixmap[slot], pte); >> >> flush_xen_tlb_range_va(FIXMAP_ADDR(slot), PAGE_SIZE); >> } >> IIUC, originaly Jan told about arch_pmap_unmap() case so that is why >> I >> decided to clarify additionaly. > > Julien, > I have a questation related to Arm's version of arch_pmap_map(): > > static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) > { > lpae_t *entry = &xen_fixmap[slot]; > lpae_t pte; > > ASSERT(!lpae_is_valid(*entry)); > > pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); > pte.pt.table = 1; > write_pte(entry, pte); > /* > * The new entry will be used very soon after arch_pmap_map() > returns. > * So ensure the DSB in write_pte() has completed before > continuing. > */ > isb(); > } > > Is the code above isb() is correct? is it insure the DSB not ISB? I guess you mean comment. If so, yes it is correct. write_pte() has a dsb() just after the PTE. This is to guarantee that implicit memory access (instruction fetch or from the Hardware translation table access) will only happen *after* the dsb() has completed. I am not 100% sure whether the isb() is required for arch_pmap_map(). But I took the safest approach when implementing it. > And isn't need to do TLB flush here? It is not. On Arm, we have a strict policy that every unmap will be followed by a TLB flush and you can't modify an existing entry (see ASSERT a few lines above). So the TLBs will not contain an entry for mapping. This policy was mainly dictacted by the Arm Arm because when modifying the output address you need to follow the break-before-make sequence which means you have to transition to an invalid mapping and flush the TLBs before the new entry is added. For simplicity, we decided to just not bother with trying to implement break-before-make for the hypervisor page-tables. But we do for the P2M. Note that newer revision of the Armv8 spec relaxed the requirement (see FEAT_BBM). Cheers,
diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig index 259eea8d3b..0112aa8778 100644 --- a/xen/arch/riscv/Kconfig +++ b/xen/arch/riscv/Kconfig @@ -3,6 +3,7 @@ config RISCV select FUNCTION_ALIGNMENT_16B select GENERIC_BUG_FRAME select HAS_DEVICE_TREE + select HAS_PMAP config RISCV_64 def_bool y diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h index cbbf3656d1..339074d502 100644 --- a/xen/arch/riscv/include/asm/page.h +++ b/xen/arch/riscv/include/asm/page.h @@ -51,6 +51,8 @@ typedef struct { #endif } pte_t; +pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr); + static inline pte_t paddr_to_pte(paddr_t paddr, unsigned int permissions) { diff --git a/xen/arch/riscv/include/asm/pmap.h b/xen/arch/riscv/include/asm/pmap.h new file mode 100644 index 0000000000..eb4c48515c --- /dev/null +++ b/xen/arch/riscv/include/asm/pmap.h @@ -0,0 +1,28 @@ +#ifndef __ASM_PMAP_H__ +#define __ASM_PMAP_H__ + +#include <xen/bug.h> +#include <xen/mm.h> + +#include <asm/fixmap.h> + +static inline void arch_pmap_map(unsigned int slot, mfn_t mfn) +{ + pte_t *entry = &xen_fixmap[slot]; + pte_t pte; + + ASSERT(!pte_is_valid(*entry)); + + pte = mfn_to_xen_entry(mfn, PAGE_HYPERVISOR_RW); + pte.pte |= PTE_LEAF_DEFAULT; + write_pte(entry, pte); +} + +static inline void arch_pmap_unmap(unsigned int slot) +{ + pte_t pte = {}; + + write_pte(&xen_fixmap[slot], pte); +} + +#endif /* __ASM_PMAP_H__ */ diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c index d69a174b5d..445319af08 100644 --- a/xen/arch/riscv/mm.c +++ b/xen/arch/riscv/mm.c @@ -370,3 +370,17 @@ int map_pages_to_xen(unsigned long virt, BUG_ON("unimplemented"); return -1; } + +static inline pte_t mfn_to_pte(mfn_t mfn) +{ + unsigned long pte = mfn_x(mfn) << PTE_PPN_SHIFT; + return (pte_t){ .pte = pte}; +} + +inline pte_t mfn_to_xen_entry(mfn_t mfn, unsigned int attr) +{ + /* there is no attr field in RISC-V's pte */ + (void) attr; + + return mfn_to_pte(mfn); +}
Introduces arch_pmap_{un}map functions and select HAS_PMAP for CONFIG_RISCV. Additionaly it was necessary to introduce functions: - mfn_to_xen_entry - mfn_to_pte Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- Changes in V2: - newly introduced patch --- xen/arch/riscv/Kconfig | 1 + xen/arch/riscv/include/asm/page.h | 2 ++ xen/arch/riscv/include/asm/pmap.h | 28 ++++++++++++++++++++++++++++ xen/arch/riscv/mm.c | 14 ++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 xen/arch/riscv/include/asm/pmap.h