Message ID | 20240129171811.21382-15-carlo.nonato@minervasys.tech (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Arm cache coloring | expand |
On 29.01.2024 18:18, Carlo Nonato wrote: > @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) > flush_xen_tlb_local(); > } > > +static void __init create_llc_coloring_mappings(void) > +{ > + lpae_t pte; > + unsigned int i; > + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); > + mfn_t mfn = maddr_to_mfn(xen_bootmodule->start); > + > + for_each_xen_colored_mfn( mfn, i ) Nit: Either you consider the construct a pseudo-keyword, then for_each_xen_colored_mfn ( mfn, i ) or you don't, then for_each_xen_colored_mfn(mfn, i) please. > --- a/xen/common/llc-coloring.c > +++ b/xen/common/llc-coloring.c > @@ -29,6 +29,8 @@ static unsigned int __ro_after_init xen_num_colors; > > #define mfn_color_mask (max_nr_colors - 1) > #define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) > +#define mfn_set_color(mfn, color) (_mfn((mfn_x(mfn) & ~mfn_color_mask) | \ > + (color))) Nit: The wrapped line wants further indenting, such that it becomes immediately clear what parentheses are still open. Alternatively: #define mfn_set_color(mfn, color) \ (_mfn((mfn_x(mfn) & ~mfn_color_mask) | (color))) This is certainly an "interesting" construct: I, for one, wouldn't expect that setting the color actually changes the MFN. > @@ -316,6 +318,27 @@ unsigned int get_max_nr_llc_colors(void) > return max_nr_colors; > } > > +paddr_t __init xen_colored_map_size(void) > +{ > + return ROUNDUP((_end - _start) * max_nr_colors, XEN_PADDR_ALIGN); > +} > + > +mfn_t __init xen_colored_mfn(mfn_t mfn) > +{ > + unsigned int i, color = mfn_to_color(mfn); > + > + for( i = 0; i < xen_num_colors; i++ ) Nit: Missing blank. > + { > + if ( color == xen_colors[i] ) > + return mfn; > + else if ( color < xen_colors[i] ) > + return mfn_set_color(mfn, xen_colors[i]); How do you know that this or ... > + } > + > + /* Jump to next color space (max_nr_colors mfns) and use the first color */ > + return mfn_set_color(mfn_add(mfn, max_nr_colors), xen_colors[0]); ... this MFN are actually valid and in (available) RAM? > --- a/xen/include/xen/llc-coloring.h > +++ b/xen/include/xen/llc-coloring.h > @@ -24,6 +24,17 @@ static inline void domain_llc_coloring_free(struct domain *d) {} > static inline void domain_dump_llc_colors(const struct domain *d) {} > #endif > > +/** > + * Iterate over each Xen mfn in the colored space. > + * @mfn: the current mfn. The first non colored mfn must be provided as the > + * starting point. > + * @i: loop index. > + */ > +#define for_each_xen_colored_mfn(mfn, i) \ > + for ( i = 0, mfn = xen_colored_mfn(mfn); \ > + i < (_end - _start) >> PAGE_SHIFT; \ > + i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) ) While the comment mentions it, I still consider it problematic that - unlike other for_each_* constructs we have - this requires one of the iteration variables to be set up front. Question is why it needs to be that way: Isn't it the MFN underlying _start which you mean to start from? Jan
Hi Jan, On Tue, Feb 13, 2024 at 4:25 PM Jan Beulich <jbeulich@suse.com> wrote: > > On 29.01.2024 18:18, Carlo Nonato wrote: > > @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) > > flush_xen_tlb_local(); > > } > > > > +static void __init create_llc_coloring_mappings(void) > > +{ > > + lpae_t pte; > > + unsigned int i; > > + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); > > + mfn_t mfn = maddr_to_mfn(xen_bootmodule->start); > > + > > + for_each_xen_colored_mfn( mfn, i ) > > Nit: Either you consider the construct a pseudo-keyword, then > > for_each_xen_colored_mfn ( mfn, i ) > > or you don't, then > > for_each_xen_colored_mfn(mfn, i) > > please. > > > --- a/xen/common/llc-coloring.c > > +++ b/xen/common/llc-coloring.c > > @@ -29,6 +29,8 @@ static unsigned int __ro_after_init xen_num_colors; > > > > #define mfn_color_mask (max_nr_colors - 1) > > #define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) > > +#define mfn_set_color(mfn, color) (_mfn((mfn_x(mfn) & ~mfn_color_mask) | \ > > + (color))) > > Nit: The wrapped line wants further indenting, such that it becomes > immediately clear what parentheses are still open. Alternatively: > > #define mfn_set_color(mfn, color) \ > (_mfn((mfn_x(mfn) & ~mfn_color_mask) | (color))) > > This is certainly an "interesting" construct: I, for one, wouldn't expect > that setting the color actually changes the MFN. Would something like mfn_with_color() be a better name? I need something that expresses clearly that something will be returned. Maybe colored_mfn() is even better? > > @@ -316,6 +318,27 @@ unsigned int get_max_nr_llc_colors(void) > > return max_nr_colors; > > } > > > > +paddr_t __init xen_colored_map_size(void) > > +{ > > + return ROUNDUP((_end - _start) * max_nr_colors, XEN_PADDR_ALIGN); > > +} > > + > > +mfn_t __init xen_colored_mfn(mfn_t mfn) > > +{ > > + unsigned int i, color = mfn_to_color(mfn); > > + > > + for( i = 0; i < xen_num_colors; i++ ) > > Nit: Missing blank. > > > + { > > + if ( color == xen_colors[i] ) > > + return mfn; > > + else if ( color < xen_colors[i] ) > > + return mfn_set_color(mfn, xen_colors[i]); > > How do you know that this or ... > > > + } > > + > > + /* Jump to next color space (max_nr_colors mfns) and use the first color */ > > + return mfn_set_color(mfn_add(mfn, max_nr_colors), xen_colors[0]); > > ... this MFN are actually valid and in (available) RAM? Xen must be relocated in a valid and available physically colored space. In arm we do that by searching in RAM for a contiguous region that can contain the colored version of Xen (including "holes" of memory that is skipped due to coloring). So we know that if mfn is in this region then the computed colored MFN is in the same valid region as well. I should ASSERT that somehow. This should be something like virt_to_mfn(_start) < mfn < virt_to_mfn(_end) (abusing a bit of syntax), but the problem is that this function is called also when page tables are still not set so I can't count on virt_to_mfn(). > > --- a/xen/include/xen/llc-coloring.h > > +++ b/xen/include/xen/llc-coloring.h > > @@ -24,6 +24,17 @@ static inline void domain_llc_coloring_free(struct domain *d) {} > > static inline void domain_dump_llc_colors(const struct domain *d) {} > > #endif > > > > +/** > > + * Iterate over each Xen mfn in the colored space. > > + * @mfn: the current mfn. The first non colored mfn must be provided as the > > + * starting point. > > + * @i: loop index. > > + */ > > +#define for_each_xen_colored_mfn(mfn, i) \ > > + for ( i = 0, mfn = xen_colored_mfn(mfn); \ > > + i < (_end - _start) >> PAGE_SHIFT; \ > > + i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) ) > > While the comment mentions it, I still consider it problematic that > - unlike other for_each_* constructs we have - this requires one of > the iteration variables to be set up front. Question is why it needs > to be that way: Isn't it the MFN underlying _start which you mean to > start from? As said above, this is used also when page tables setup isn't complete so I can't easily find the first MFN. Thanks. > Jan
On 13.02.2024 18:29, Carlo Nonato wrote: > On Tue, Feb 13, 2024 at 4:25 PM Jan Beulich <jbeulich@suse.com> wrote: >> On 29.01.2024 18:18, Carlo Nonato wrote: >>> @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) >>> --- a/xen/common/llc-coloring.c >>> +++ b/xen/common/llc-coloring.c >>> @@ -29,6 +29,8 @@ static unsigned int __ro_after_init xen_num_colors; >>> >>> #define mfn_color_mask (max_nr_colors - 1) >>> #define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) >>> +#define mfn_set_color(mfn, color) (_mfn((mfn_x(mfn) & ~mfn_color_mask) | \ >>> + (color))) >> >> Nit: The wrapped line wants further indenting, such that it becomes >> immediately clear what parentheses are still open. Alternatively: >> >> #define mfn_set_color(mfn, color) \ >> (_mfn((mfn_x(mfn) & ~mfn_color_mask) | (color))) >> >> This is certainly an "interesting" construct: I, for one, wouldn't expect >> that setting the color actually changes the MFN. > > Would something like mfn_with_color() be a better name? I need something that > expresses clearly that something will be returned. Maybe colored_mfn() is even > better? The latter reads as if it was a predicate, not a transformation. The former or get_mfn_with_color() _may_ be okay. Without the get_ it's still a little predicate-like, while the get_ itself somewhat collides with other uses of that prefix, specifically e.g. get_page{,_type}(). So I'm still not overly happy, yet e.g. mfn_from_mfn_and_color() feels clumsy to me. >>> --- a/xen/include/xen/llc-coloring.h >>> +++ b/xen/include/xen/llc-coloring.h >>> @@ -24,6 +24,17 @@ static inline void domain_llc_coloring_free(struct domain *d) {} >>> static inline void domain_dump_llc_colors(const struct domain *d) {} >>> #endif >>> >>> +/** >>> + * Iterate over each Xen mfn in the colored space. >>> + * @mfn: the current mfn. The first non colored mfn must be provided as the >>> + * starting point. >>> + * @i: loop index. >>> + */ >>> +#define for_each_xen_colored_mfn(mfn, i) \ >>> + for ( i = 0, mfn = xen_colored_mfn(mfn); \ >>> + i < (_end - _start) >> PAGE_SHIFT; \ >>> + i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) ) >> >> While the comment mentions it, I still consider it problematic that >> - unlike other for_each_* constructs we have - this requires one of >> the iteration variables to be set up front. Question is why it needs >> to be that way: Isn't it the MFN underlying _start which you mean to >> start from? > > As said above, this is used also when page tables setup isn't complete > so I can't easily find the first MFN. Did you consider making the initial value a macro parameter then? Jan
Hi Jan, On Wed, Feb 14, 2024 at 8:55 AM Jan Beulich <jbeulich@suse.com> wrote: > > On 13.02.2024 18:29, Carlo Nonato wrote: > > On Tue, Feb 13, 2024 at 4:25 PM Jan Beulich <jbeulich@suse.com> wrote: > >> On 29.01.2024 18:18, Carlo Nonato wrote: > >>> @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) > >>> --- a/xen/common/llc-coloring.c > >>> +++ b/xen/common/llc-coloring.c > >>> @@ -29,6 +29,8 @@ static unsigned int __ro_after_init xen_num_colors; > >>> > >>> #define mfn_color_mask (max_nr_colors - 1) > >>> #define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) > >>> +#define mfn_set_color(mfn, color) (_mfn((mfn_x(mfn) & ~mfn_color_mask) | \ > >>> + (color))) > >> > >> Nit: The wrapped line wants further indenting, such that it becomes > >> immediately clear what parentheses are still open. Alternatively: > >> > >> #define mfn_set_color(mfn, color) \ > >> (_mfn((mfn_x(mfn) & ~mfn_color_mask) | (color))) > >> > >> This is certainly an "interesting" construct: I, for one, wouldn't expect > >> that setting the color actually changes the MFN. > > > > Would something like mfn_with_color() be a better name? I need something that > > expresses clearly that something will be returned. Maybe colored_mfn() is even > > better? > > The latter reads as if it was a predicate, not a transformation. The former > or get_mfn_with_color() _may_ be okay. Without the get_ it's still a little > predicate-like, while the get_ itself somewhat collides with other uses of > that prefix, specifically e.g. get_page{,_type}(). So I'm still not overly > happy, yet e.g. mfn_from_mfn_and_color() feels clumsy to me. For the moment get_mfn_with_color() seems the best option. > >>> --- a/xen/include/xen/llc-coloring.h > >>> +++ b/xen/include/xen/llc-coloring.h > >>> @@ -24,6 +24,17 @@ static inline void domain_llc_coloring_free(struct domain *d) {} > >>> static inline void domain_dump_llc_colors(const struct domain *d) {} > >>> #endif > >>> > >>> +/** > >>> + * Iterate over each Xen mfn in the colored space. > >>> + * @mfn: the current mfn. The first non colored mfn must be provided as the > >>> + * starting point. > >>> + * @i: loop index. > >>> + */ > >>> +#define for_each_xen_colored_mfn(mfn, i) \ > >>> + for ( i = 0, mfn = xen_colored_mfn(mfn); \ > >>> + i < (_end - _start) >> PAGE_SHIFT; \ > >>> + i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) ) > >> > >> While the comment mentions it, I still consider it problematic that > >> - unlike other for_each_* constructs we have - this requires one of > >> the iteration variables to be set up front. Question is why it needs > >> to be that way: Isn't it the MFN underlying _start which you mean to > >> start from? > > > > As said above, this is used also when page tables setup isn't complete > > so I can't easily find the first MFN. > > Did you consider making the initial value a macro parameter then? Yes, this is better. Thanks. > Jan
Hi Carlo, On 29/01/2024 17:18, Carlo Nonato wrote: > diff --git a/xen/arch/arm/arm64/mmu/head.S b/xen/arch/arm/arm64/mmu/head.S > index fa40b696dd..7926849ab1 100644 > --- a/xen/arch/arm/arm64/mmu/head.S > +++ b/xen/arch/arm/arm64/mmu/head.S > @@ -427,6 +427,60 @@ fail: PRINT("- Boot failed -\r\n") > b 1b > ENDPROC(fail) > > +/* > + * Copy Xen to new location and switch TTBR > + * x0 ttbr > + * x1 source address > + * x2 destination address > + * x3 length > + * > + * Source and destination must be word aligned, length is rounded up > + * to a 16 byte boundary. > + * > + * MUST BE VERY CAREFUL when saving things to RAM over the copy > + */ > +ENTRY(relocate_xen) > + /* > + * Copy 16 bytes at a time using: > + * x9: counter > + * x10: data > + * x11: data > + * x12: source > + * x13: destination > + */ > + mov x9, x3 > + mov x12, x1 > + mov x13, x2 > + > +1: ldp x10, x11, [x12], #16 > + stp x10, x11, [x13], #16 > + > + subs x9, x9, #16 > + bgt 1b > + > + /* > + * Flush destination from dcache using: > + * x9: counter > + * x10: step > + * x11: vaddr > + * > + * This is to ensure data is visible to the instruction cache > + */ > + dsb sy > + > + mov x9, x3 > + ldr x10, =dcache_line_bytes /* x10 := step */ > + ldr x10, [x10] > + mov x11, x2 > + > +1: dc cvac, x11 > + > + add x11, x11, x10 > + subs x9, x9, x10 > + bgt 1b > + I would add a comment here explaining you are relying on the dsb/isb in switch_ttbr_id(). > + b switch_ttbr_id > + > /* > * Switch TTBR > * > @@ -452,7 +506,8 @@ ENTRY(switch_ttbr_id) > > /* > * 5) Flush I-cache > - * This should not be necessary but it is kept for safety. > + * This should not be necessary in the general case, but it's needed > + * for cache coloring because in that case code is relocated. I think there is missing "because" just after "in that case". > */ > ic iallu > isb > diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c > index d2651c9486..07cf8040a2 100644 > --- a/xen/arch/arm/arm64/mmu/mm.c > +++ b/xen/arch/arm/arm64/mmu/mm.c > @@ -1,6 +1,7 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > > #include <xen/init.h> > +#include <xen/llc-coloring.h> > #include <xen/mm.h> > #include <xen/pfn.h> > > @@ -125,27 +126,46 @@ void update_identity_mapping(bool enable) > } > > extern void switch_ttbr_id(uint64_t ttbr); > +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len); > > typedef void (switch_ttbr_fn)(uint64_t ttbr); > +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len); > > void __init switch_ttbr(uint64_t ttbr) > { > - vaddr_t id_addr = virt_to_maddr(switch_ttbr_id); > - switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr; > + vaddr_t vaddr, id_addr; > lpae_t pte; > > + if ( llc_coloring_enabled ) > + vaddr = (vaddr_t)relocate_xen; > + else > + vaddr = (vaddr_t)switch_ttbr_id; > + > + id_addr = virt_to_maddr(vaddr); > + > /* Enable the identity mapping in the boot page tables */ > update_identity_mapping(true); > > /* Enable the identity mapping in the runtime page tables */ > - pte = pte_of_xenaddr((vaddr_t)switch_ttbr_id); > + pte = pte_of_xenaddr(vaddr); > pte.pt.table = 1; > pte.pt.xn = 0; > pte.pt.ro = 1; > write_pte(&xen_third_id[third_table_offset(id_addr)], pte); > > /* Switch TTBR */ > - fn(ttbr); > + if ( llc_coloring_enabled ) > + { > + relocate_xen_fn *fn = (relocate_xen_fn *)id_addr; > + > + fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start); > + } > + else > + { > + switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr; > + > + fn(ttbr); > + } > > /* > * Disable the identity mapping in the runtime page tables. > diff --git a/xen/arch/arm/include/asm/mmu/layout.h b/xen/arch/arm/include/asm/mmu/layout.h > index a3b546465b..7228c9fb82 100644 > --- a/xen/arch/arm/include/asm/mmu/layout.h > +++ b/xen/arch/arm/include/asm/mmu/layout.h > @@ -30,6 +30,7 @@ > * 10M - 12M Fixmap: special-purpose 4K mapping slots > * 12M - 16M Early boot mapping of FDT > * 16M - 18M Livepatch vmap (if compiled in) > + * 16M - 22M Cache-colored Xen text, data, bss (temporary, if compiled in) > * > * 1G - 2G VMAP: ioremap and early_ioremap > * > @@ -74,6 +75,8 @@ > #define BOOT_FDT_VIRT_START (FIXMAP_VIRT_START + FIXMAP_VIRT_SIZE) > #define BOOT_FDT_VIRT_SIZE _AT(vaddr_t, MB(4)) > > +#define BOOT_RELOC_VIRT_START (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE) > + > #ifdef CONFIG_LIVEPATCH > #define LIVEPATCH_VMAP_START (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE) > #define LIVEPATCH_VMAP_SIZE _AT(vaddr_t, MB(2)) > diff --git a/xen/arch/arm/llc-coloring.c b/xen/arch/arm/llc-coloring.c > index eee1e80e2d..bbb39214a8 100644 > --- a/xen/arch/arm/llc-coloring.c > +++ b/xen/arch/arm/llc-coloring.c > @@ -9,6 +9,7 @@ > > #include <asm/processor.h> > #include <asm/sysregs.h> > +#include <asm/setup.h> > > /* Return the LLC way size by probing the hardware */ > unsigned int __init get_llc_way_size(void) > @@ -62,7 +63,65 @@ unsigned int __init get_llc_way_size(void) > return line_size * num_sets; > } > > -void __init arch_llc_coloring_init(void) {} > +/** > + * get_xen_paddr - get physical address to relocate Xen to > + * > + * Xen is relocated to as near to the top of RAM as possible and > + * aligned to a XEN_PADDR_ALIGN boundary. > + */ > +static paddr_t __init get_xen_paddr(uint32_t xen_size) AFAICT, xen_size will be the size of a color. Is it possible that this will be bigger than 4GB? If so, this needs to be converted to a paddr_t. > +{ > + struct meminfo *mi = &bootinfo.mem; AFAICT, you are not modifying meminfo. So this can be const. > + paddr_t min_size; > + paddr_t paddr = 0; > + int i; unsigned int please. I understand this is re-imported previous code, but we should avoid re-introduce issues with it. > + > + min_size = (xen_size + (XEN_PADDR_ALIGN-1)) & ~(XEN_PADDR_ALIGN-1); coding style: space before/after -. > + > + /* Find the highest bank with enough space. */ > + for ( i = 0; i < mi->nr_banks; i++ ) > + { > + const struct membank *bank = &mi->bank[i]; > + paddr_t s, e; > + > + if ( bank->size >= min_size ) > + { > + e = consider_modules(bank->start, bank->start + bank->size, > + min_size, XEN_PADDR_ALIGN, 0); > + if ( !e ) > + continue; > + > +#ifdef CONFIG_ARM_32 > + /* Xen must be under 4GB */ > + if ( e > 0x100000000ULL ) > + e = 0x100000000ULL; Can you replace with GB(4)? This makes easier to confirm the constant are correct. > + if ( e < bank->start ) > + continue; > +#endif > + > + s = e - min_size; > + > + if ( s > paddr ) > + paddr = s; > + } > + } > + > + if ( !paddr ) > + panic("Not enough memory to relocate Xen\n"); > + > + printk("Placing Xen at 0x%"PRIpaddr"-0x%"PRIpaddr"\n", > + paddr, paddr + min_size); > + > + return paddr; > +} > + > +void __init arch_llc_coloring_init(void) > +{ > + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); I would add BUG_ON(xen_bootmodule). This would make it easier that trying to figure a NULL pointer dereference. > + > + xen_bootmodule->size = xen_colored_map_size(); > + xen_bootmodule->start = get_xen_paddr(xen_bootmodule->size); > +} > > /* > * Local variables: > diff --git a/xen/arch/arm/mmu/setup.c b/xen/arch/arm/mmu/setup.c > index 72725840b6..f3e4f6c304 100644 > --- a/xen/arch/arm/mmu/setup.c > +++ b/xen/arch/arm/mmu/setup.c > @@ -7,6 +7,7 @@ > > #include <xen/init.h> > #include <xen/libfdt/libfdt.h> > +#include <xen/llc-coloring.h> > #include <xen/sizes.h> > #include <xen/vmap.h> > > @@ -15,6 +16,11 @@ > /* Override macros from asm/page.h to make them work with mfn_t */ > #undef mfn_to_virt > #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn)) > +#undef virt_to_mfn > +#define virt_to_mfn(va) _mfn(__virt_to_mfn(va)) > + > +#define virt_to_reloc_virt(virt) \ > + (((vaddr_t)virt) - XEN_VIRT_START + BOOT_RELOC_VIRT_START) > > /* Main runtime page tables */ > > @@ -69,6 +75,7 @@ static void __init __maybe_unused build_assertions(void) > /* 2MB aligned regions */ > BUILD_BUG_ON(XEN_VIRT_START & ~SECOND_MASK); > BUILD_BUG_ON(FIXMAP_ADDR(0) & ~SECOND_MASK); > + BUILD_BUG_ON(BOOT_RELOC_VIRT_START & ~SECOND_MASK); > /* 1GB aligned regions */ > #ifdef CONFIG_ARM_32 > BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK); > @@ -132,7 +139,12 @@ static void __init __maybe_unused build_assertions(void) > > lpae_t __init pte_of_xenaddr(vaddr_t va) > { > - paddr_t ma = va + phys_offset; > + paddr_t ma; > + > + if ( llc_coloring_enabled ) > + ma = virt_to_maddr(virt_to_reloc_virt(va)); > + else > + ma = va + phys_offset; > > return mfn_to_xen_entry(maddr_to_mfn(ma), MT_NORMAL); > } > @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) > flush_xen_tlb_local(); > } > > +static void __init create_llc_coloring_mappings(void) > +{ > + lpae_t pte; > + unsigned int i; > + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); Please add a BUILD_BUG_ON() just to confirm xen_bootmodule is not NULL. > + mfn_t mfn = maddr_to_mfn(xen_bootmodule->start); > + > + for_each_xen_colored_mfn( mfn, i ) > + { > + pte = mfn_to_xen_entry(mfn, MT_NORMAL); > + pte.pt.table = 1; /* level 3 mappings always have this bit set */ > + xen_xenmap[i] = pte; > + } > + > + for ( i = 0; i < XEN_NR_ENTRIES(2); i++ ) > + { > + vaddr_t va = BOOT_RELOC_VIRT_START + (i << XEN_PT_LEVEL_SHIFT(2)); > + > + pte = mfn_to_xen_entry(virt_to_mfn(xen_xenmap + > + i * XEN_PT_LPAE_ENTRIES), > + MT_NORMAL); > + pte.pt.table = 1; > + write_pte(&boot_second[second_table_offset(va)], pte); > + } > +} > + > /* > - * Boot-time pagetable setup. > + * Boot-time pagetable setup with coloring support > * Changes here may need matching changes in head.S > + * > + * The cache coloring support consists of: > + * - Create colored mapping that conforms to Xen color selection in xen_xenmap[] > + * - Link the mapping in boot page tables using BOOT_RELOC_VIRT_START as vaddr > + * - pte_of_xenaddr() takes care of translating addresses to the new space > + * during runtime page tables creation > + * - Relocate xen and update TTBR with the new address in the colored space > + * (see switch_ttbr()) > + * - Protect the new space > */ > void __init setup_pagetables(unsigned long boot_phys_offset) > { > @@ -230,6 +277,9 @@ void __init setup_pagetables(unsigned long boot_phys_offset) > > phys_offset = boot_phys_offset; > > + if ( llc_coloring_enabled ) > + create_llc_coloring_mappings(); > + > arch_setup_page_tables(); > > #ifdef CONFIG_ARM_64 > @@ -257,13 +307,7 @@ void __init setup_pagetables(unsigned long boot_phys_offset) > break; > pte = pte_of_xenaddr(va); > pte.pt.table = 1; /* third level mappings always have this bit set */ > - if ( is_kernel_text(va) || is_kernel_inittext(va) ) > - { > - pte.pt.xn = 0; > - pte.pt.ro = 1; > - } > - if ( is_kernel_rodata(va) ) > - pte.pt.ro = 1; > + pte.pt.xn = 0; /* Permissions will be enforced later. Allow execution */ I feel this patch contains a lot of change that are cache coloring specific but somewhat different. I think you could have split in a more piece meal one which would have made the review easier. No need to split this patch now, but if there are more place to change, then please consider to do them in separate patches. > xen_xenmap[i] = pte; > } > > @@ -289,8 +333,43 @@ void __init setup_pagetables(unsigned long boot_phys_offset) > ttbr = (uintptr_t) cpu0_pgtable + phys_offset; > #endif > > + if ( llc_coloring_enabled ) > + ttbr = virt_to_maddr(virt_to_reloc_virt(THIS_CPU_PGTABLE)); You are using THIS_CPU_PGTABLE here, but technically it is only valid after #ifdef CONFIG_ARM_32 per_cpu(xen_pgtable, ...) = ... #endif AFAICT, you haven't moved it. > + > switch_ttbr(ttbr); > > + /* Protect Xen */ > + for ( i = 0; i < XEN_NR_ENTRIES(3); i++ ) > + { > + vaddr_t va = XEN_VIRT_START + (i << PAGE_SHIFT); > + lpae_t *entry = xen_xenmap + i; > + > + if ( !is_kernel(va) ) > + break; > + > + pte = read_atomic(entry); > + > + if ( is_kernel_text(va) || is_kernel_inittext(va) ) > + { > + pte.pt.xn = 0; > + pte.pt.ro = 1; > + } else if ( is_kernel_rodata(va) ) { > + pte.pt.ro = 1; > + pte.pt.xn = 1; > + } else { > + pte.pt.xn = 1; > + pte.pt.ro = 0; > + } > + > + write_pte(entry, pte); > + } > + > + /* > + * We modified live page-tables. Ensure the TBLs are invalidated s/TBLs/TLBs/ > + * before setting enforcing the WnX permissions. > + */ > + flush_xen_tlb_local(); > + > xen_pt_enforce_wnx(); > > #ifdef CONFIG_ARM_32 > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c > index 28f4761705..64a449f78d 100644 > --- a/xen/arch/arm/setup.c > +++ b/xen/arch/arm/setup.c > @@ -816,8 +816,6 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > /* Initialize traps early allow us to get backtrace when an error occurred */ > init_traps(); > > - setup_pagetables(boot_phys_offset); > - > smp_clear_cpu_maps(); > > device_tree_flattened = early_fdt_map(fdt_paddr); > @@ -841,6 +839,9 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > > llc_coloring_init(); > > + setup_pagetables(boot_phys_offset); Please add a comment on top explaining that setup_pagetables() relies on the cache coloring feature to be initalized. > + device_tree_flattened = early_fdt_map(fdt_paddr); I understand why you are calling early_fdt_map(). But it reads odd that this is called twice. I think you want to explain in a comment why this is called again and why we don't unmap the previous one. Cheers,
diff --git a/xen/arch/arm/alternative.c b/xen/arch/arm/alternative.c index 016e66978b..a6b0794d80 100644 --- a/xen/arch/arm/alternative.c +++ b/xen/arch/arm/alternative.c @@ -9,6 +9,7 @@ #include <xen/init.h> #include <xen/types.h> #include <xen/kernel.h> +#include <xen/llc-coloring.h> #include <xen/mm.h> #include <xen/vmap.h> #include <xen/smp.h> @@ -191,6 +192,27 @@ static int __apply_alternatives_multi_stop(void *xenmap) return 0; } +static void __init *xen_remap_colored(mfn_t xen_mfn, paddr_t xen_size) +{ + unsigned int i; + void *xenmap; + mfn_t *xen_colored_mfns; + + xen_colored_mfns = xmalloc_array(mfn_t, xen_size >> PAGE_SHIFT); + if ( !xen_colored_mfns ) + panic("Can't allocate LLC colored MFNs\n"); + + for_each_xen_colored_mfn( xen_mfn, i ) + { + xen_colored_mfns[i] = xen_mfn; + } + + xenmap = vmap(xen_colored_mfns, xen_size >> PAGE_SHIFT); + xfree(xen_colored_mfns); + + return xenmap; +} + /* * This function should only be called during boot and before CPU0 jump * into the idle_loop. @@ -209,8 +231,12 @@ void __init apply_alternatives_all(void) * The text and inittext section are read-only. So re-map Xen to * be able to patch the code. */ - xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR, - VMAP_DEFAULT); + if ( llc_coloring_enabled ) + xenmap = xen_remap_colored(xen_mfn, xen_size); + else + xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR, + VMAP_DEFAULT); + /* Re-mapping Xen is not expected to fail during boot. */ BUG_ON(!xenmap); diff --git a/xen/arch/arm/arm64/mmu/head.S b/xen/arch/arm/arm64/mmu/head.S index fa40b696dd..7926849ab1 100644 --- a/xen/arch/arm/arm64/mmu/head.S +++ b/xen/arch/arm/arm64/mmu/head.S @@ -427,6 +427,60 @@ fail: PRINT("- Boot failed -\r\n") b 1b ENDPROC(fail) +/* + * Copy Xen to new location and switch TTBR + * x0 ttbr + * x1 source address + * x2 destination address + * x3 length + * + * Source and destination must be word aligned, length is rounded up + * to a 16 byte boundary. + * + * MUST BE VERY CAREFUL when saving things to RAM over the copy + */ +ENTRY(relocate_xen) + /* + * Copy 16 bytes at a time using: + * x9: counter + * x10: data + * x11: data + * x12: source + * x13: destination + */ + mov x9, x3 + mov x12, x1 + mov x13, x2 + +1: ldp x10, x11, [x12], #16 + stp x10, x11, [x13], #16 + + subs x9, x9, #16 + bgt 1b + + /* + * Flush destination from dcache using: + * x9: counter + * x10: step + * x11: vaddr + * + * This is to ensure data is visible to the instruction cache + */ + dsb sy + + mov x9, x3 + ldr x10, =dcache_line_bytes /* x10 := step */ + ldr x10, [x10] + mov x11, x2 + +1: dc cvac, x11 + + add x11, x11, x10 + subs x9, x9, x10 + bgt 1b + + b switch_ttbr_id + /* * Switch TTBR * @@ -452,7 +506,8 @@ ENTRY(switch_ttbr_id) /* * 5) Flush I-cache - * This should not be necessary but it is kept for safety. + * This should not be necessary in the general case, but it's needed + * for cache coloring because in that case code is relocated. */ ic iallu isb diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c index d2651c9486..07cf8040a2 100644 --- a/xen/arch/arm/arm64/mmu/mm.c +++ b/xen/arch/arm/arm64/mmu/mm.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <xen/init.h> +#include <xen/llc-coloring.h> #include <xen/mm.h> #include <xen/pfn.h> @@ -125,27 +126,46 @@ void update_identity_mapping(bool enable) } extern void switch_ttbr_id(uint64_t ttbr); +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len); typedef void (switch_ttbr_fn)(uint64_t ttbr); +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len); void __init switch_ttbr(uint64_t ttbr) { - vaddr_t id_addr = virt_to_maddr(switch_ttbr_id); - switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr; + vaddr_t vaddr, id_addr; lpae_t pte; + if ( llc_coloring_enabled ) + vaddr = (vaddr_t)relocate_xen; + else + vaddr = (vaddr_t)switch_ttbr_id; + + id_addr = virt_to_maddr(vaddr); + /* Enable the identity mapping in the boot page tables */ update_identity_mapping(true); /* Enable the identity mapping in the runtime page tables */ - pte = pte_of_xenaddr((vaddr_t)switch_ttbr_id); + pte = pte_of_xenaddr(vaddr); pte.pt.table = 1; pte.pt.xn = 0; pte.pt.ro = 1; write_pte(&xen_third_id[third_table_offset(id_addr)], pte); /* Switch TTBR */ - fn(ttbr); + if ( llc_coloring_enabled ) + { + relocate_xen_fn *fn = (relocate_xen_fn *)id_addr; + + fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start); + } + else + { + switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr; + + fn(ttbr); + } /* * Disable the identity mapping in the runtime page tables. diff --git a/xen/arch/arm/include/asm/mmu/layout.h b/xen/arch/arm/include/asm/mmu/layout.h index a3b546465b..7228c9fb82 100644 --- a/xen/arch/arm/include/asm/mmu/layout.h +++ b/xen/arch/arm/include/asm/mmu/layout.h @@ -30,6 +30,7 @@ * 10M - 12M Fixmap: special-purpose 4K mapping slots * 12M - 16M Early boot mapping of FDT * 16M - 18M Livepatch vmap (if compiled in) + * 16M - 22M Cache-colored Xen text, data, bss (temporary, if compiled in) * * 1G - 2G VMAP: ioremap and early_ioremap * @@ -74,6 +75,8 @@ #define BOOT_FDT_VIRT_START (FIXMAP_VIRT_START + FIXMAP_VIRT_SIZE) #define BOOT_FDT_VIRT_SIZE _AT(vaddr_t, MB(4)) +#define BOOT_RELOC_VIRT_START (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE) + #ifdef CONFIG_LIVEPATCH #define LIVEPATCH_VMAP_START (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE) #define LIVEPATCH_VMAP_SIZE _AT(vaddr_t, MB(2)) diff --git a/xen/arch/arm/llc-coloring.c b/xen/arch/arm/llc-coloring.c index eee1e80e2d..bbb39214a8 100644 --- a/xen/arch/arm/llc-coloring.c +++ b/xen/arch/arm/llc-coloring.c @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/sysregs.h> +#include <asm/setup.h> /* Return the LLC way size by probing the hardware */ unsigned int __init get_llc_way_size(void) @@ -62,7 +63,65 @@ unsigned int __init get_llc_way_size(void) return line_size * num_sets; } -void __init arch_llc_coloring_init(void) {} +/** + * get_xen_paddr - get physical address to relocate Xen to + * + * Xen is relocated to as near to the top of RAM as possible and + * aligned to a XEN_PADDR_ALIGN boundary. + */ +static paddr_t __init get_xen_paddr(uint32_t xen_size) +{ + struct meminfo *mi = &bootinfo.mem; + paddr_t min_size; + paddr_t paddr = 0; + int i; + + min_size = (xen_size + (XEN_PADDR_ALIGN-1)) & ~(XEN_PADDR_ALIGN-1); + + /* Find the highest bank with enough space. */ + for ( i = 0; i < mi->nr_banks; i++ ) + { + const struct membank *bank = &mi->bank[i]; + paddr_t s, e; + + if ( bank->size >= min_size ) + { + e = consider_modules(bank->start, bank->start + bank->size, + min_size, XEN_PADDR_ALIGN, 0); + if ( !e ) + continue; + +#ifdef CONFIG_ARM_32 + /* Xen must be under 4GB */ + if ( e > 0x100000000ULL ) + e = 0x100000000ULL; + if ( e < bank->start ) + continue; +#endif + + s = e - min_size; + + if ( s > paddr ) + paddr = s; + } + } + + if ( !paddr ) + panic("Not enough memory to relocate Xen\n"); + + printk("Placing Xen at 0x%"PRIpaddr"-0x%"PRIpaddr"\n", + paddr, paddr + min_size); + + return paddr; +} + +void __init arch_llc_coloring_init(void) +{ + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); + + xen_bootmodule->size = xen_colored_map_size(); + xen_bootmodule->start = get_xen_paddr(xen_bootmodule->size); +} /* * Local variables: diff --git a/xen/arch/arm/mmu/setup.c b/xen/arch/arm/mmu/setup.c index 72725840b6..f3e4f6c304 100644 --- a/xen/arch/arm/mmu/setup.c +++ b/xen/arch/arm/mmu/setup.c @@ -7,6 +7,7 @@ #include <xen/init.h> #include <xen/libfdt/libfdt.h> +#include <xen/llc-coloring.h> #include <xen/sizes.h> #include <xen/vmap.h> @@ -15,6 +16,11 @@ /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_virt #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn)) +#undef virt_to_mfn +#define virt_to_mfn(va) _mfn(__virt_to_mfn(va)) + +#define virt_to_reloc_virt(virt) \ + (((vaddr_t)virt) - XEN_VIRT_START + BOOT_RELOC_VIRT_START) /* Main runtime page tables */ @@ -69,6 +75,7 @@ static void __init __maybe_unused build_assertions(void) /* 2MB aligned regions */ BUILD_BUG_ON(XEN_VIRT_START & ~SECOND_MASK); BUILD_BUG_ON(FIXMAP_ADDR(0) & ~SECOND_MASK); + BUILD_BUG_ON(BOOT_RELOC_VIRT_START & ~SECOND_MASK); /* 1GB aligned regions */ #ifdef CONFIG_ARM_32 BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK); @@ -132,7 +139,12 @@ static void __init __maybe_unused build_assertions(void) lpae_t __init pte_of_xenaddr(vaddr_t va) { - paddr_t ma = va + phys_offset; + paddr_t ma; + + if ( llc_coloring_enabled ) + ma = virt_to_maddr(virt_to_reloc_virt(va)); + else + ma = va + phys_offset; return mfn_to_xen_entry(maddr_to_mfn(ma), MT_NORMAL); } @@ -218,9 +230,44 @@ static void xen_pt_enforce_wnx(void) flush_xen_tlb_local(); } +static void __init create_llc_coloring_mappings(void) +{ + lpae_t pte; + unsigned int i; + struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN); + mfn_t mfn = maddr_to_mfn(xen_bootmodule->start); + + for_each_xen_colored_mfn( mfn, i ) + { + pte = mfn_to_xen_entry(mfn, MT_NORMAL); + pte.pt.table = 1; /* level 3 mappings always have this bit set */ + xen_xenmap[i] = pte; + } + + for ( i = 0; i < XEN_NR_ENTRIES(2); i++ ) + { + vaddr_t va = BOOT_RELOC_VIRT_START + (i << XEN_PT_LEVEL_SHIFT(2)); + + pte = mfn_to_xen_entry(virt_to_mfn(xen_xenmap + + i * XEN_PT_LPAE_ENTRIES), + MT_NORMAL); + pte.pt.table = 1; + write_pte(&boot_second[second_table_offset(va)], pte); + } +} + /* - * Boot-time pagetable setup. + * Boot-time pagetable setup with coloring support * Changes here may need matching changes in head.S + * + * The cache coloring support consists of: + * - Create colored mapping that conforms to Xen color selection in xen_xenmap[] + * - Link the mapping in boot page tables using BOOT_RELOC_VIRT_START as vaddr + * - pte_of_xenaddr() takes care of translating addresses to the new space + * during runtime page tables creation + * - Relocate xen and update TTBR with the new address in the colored space + * (see switch_ttbr()) + * - Protect the new space */ void __init setup_pagetables(unsigned long boot_phys_offset) { @@ -230,6 +277,9 @@ void __init setup_pagetables(unsigned long boot_phys_offset) phys_offset = boot_phys_offset; + if ( llc_coloring_enabled ) + create_llc_coloring_mappings(); + arch_setup_page_tables(); #ifdef CONFIG_ARM_64 @@ -257,13 +307,7 @@ void __init setup_pagetables(unsigned long boot_phys_offset) break; pte = pte_of_xenaddr(va); pte.pt.table = 1; /* third level mappings always have this bit set */ - if ( is_kernel_text(va) || is_kernel_inittext(va) ) - { - pte.pt.xn = 0; - pte.pt.ro = 1; - } - if ( is_kernel_rodata(va) ) - pte.pt.ro = 1; + pte.pt.xn = 0; /* Permissions will be enforced later. Allow execution */ xen_xenmap[i] = pte; } @@ -289,8 +333,43 @@ void __init setup_pagetables(unsigned long boot_phys_offset) ttbr = (uintptr_t) cpu0_pgtable + phys_offset; #endif + if ( llc_coloring_enabled ) + ttbr = virt_to_maddr(virt_to_reloc_virt(THIS_CPU_PGTABLE)); + switch_ttbr(ttbr); + /* Protect Xen */ + for ( i = 0; i < XEN_NR_ENTRIES(3); i++ ) + { + vaddr_t va = XEN_VIRT_START + (i << PAGE_SHIFT); + lpae_t *entry = xen_xenmap + i; + + if ( !is_kernel(va) ) + break; + + pte = read_atomic(entry); + + if ( is_kernel_text(va) || is_kernel_inittext(va) ) + { + pte.pt.xn = 0; + pte.pt.ro = 1; + } else if ( is_kernel_rodata(va) ) { + pte.pt.ro = 1; + pte.pt.xn = 1; + } else { + pte.pt.xn = 1; + pte.pt.ro = 0; + } + + write_pte(entry, pte); + } + + /* + * We modified live page-tables. Ensure the TBLs are invalidated + * before setting enforcing the WnX permissions. + */ + flush_xen_tlb_local(); + xen_pt_enforce_wnx(); #ifdef CONFIG_ARM_32 diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c index 28f4761705..64a449f78d 100644 --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -816,8 +816,6 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, /* Initialize traps early allow us to get backtrace when an error occurred */ init_traps(); - setup_pagetables(boot_phys_offset); - smp_clear_cpu_maps(); device_tree_flattened = early_fdt_map(fdt_paddr); @@ -841,6 +839,9 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, llc_coloring_init(); + setup_pagetables(boot_phys_offset); + device_tree_flattened = early_fdt_map(fdt_paddr); + setup_mm(); /* Parse the ACPI tables for possible boot-time configuration */ diff --git a/xen/common/llc-coloring.c b/xen/common/llc-coloring.c index dace881b55..c0c4ce47bf 100644 --- a/xen/common/llc-coloring.c +++ b/xen/common/llc-coloring.c @@ -29,6 +29,8 @@ static unsigned int __ro_after_init xen_num_colors; #define mfn_color_mask (max_nr_colors - 1) #define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) +#define mfn_set_color(mfn, color) (_mfn((mfn_x(mfn) & ~mfn_color_mask) | \ + (color))) /* * Parse the coloring configuration given in the buf string, following the @@ -316,6 +318,27 @@ unsigned int get_max_nr_llc_colors(void) return max_nr_colors; } +paddr_t __init xen_colored_map_size(void) +{ + return ROUNDUP((_end - _start) * max_nr_colors, XEN_PADDR_ALIGN); +} + +mfn_t __init xen_colored_mfn(mfn_t mfn) +{ + unsigned int i, color = mfn_to_color(mfn); + + for( i = 0; i < xen_num_colors; i++ ) + { + if ( color == xen_colors[i] ) + return mfn; + else if ( color < xen_colors[i] ) + return mfn_set_color(mfn, xen_colors[i]); + } + + /* Jump to next color space (max_nr_colors mfns) and use the first color */ + return mfn_set_color(mfn_add(mfn, max_nr_colors), xen_colors[0]); +} + /* * Local variables: * mode: C diff --git a/xen/include/xen/llc-coloring.h b/xen/include/xen/llc-coloring.h index b96a7134ed..5cb560d75d 100644 --- a/xen/include/xen/llc-coloring.h +++ b/xen/include/xen/llc-coloring.h @@ -24,6 +24,17 @@ static inline void domain_llc_coloring_free(struct domain *d) {} static inline void domain_dump_llc_colors(const struct domain *d) {} #endif +/** + * Iterate over each Xen mfn in the colored space. + * @mfn: the current mfn. The first non colored mfn must be provided as the + * starting point. + * @i: loop index. + */ +#define for_each_xen_colored_mfn(mfn, i) \ + for ( i = 0, mfn = xen_colored_mfn(mfn); \ + i < (_end - _start) >> PAGE_SHIFT; \ + i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) ) + unsigned int get_llc_way_size(void); void arch_llc_coloring_init(void); int dom0_set_llc_colors(struct domain *d); @@ -35,6 +46,9 @@ struct page_info; unsigned int page_to_llc_color(const struct page_info *pg); unsigned int get_max_nr_llc_colors(void); +paddr_t xen_colored_map_size(void); +mfn_t xen_colored_mfn(mfn_t mfn); + #endif /* __COLORING_H__ */ /*