Message ID | 20240129171811.21382-11-carlo.nonato@minervasys.tech (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Arm cache coloring | expand |
On 29.01.2024 18:18, Carlo Nonato wrote: > @@ -157,7 +158,11 @@ > #define PGC_static 0 > #endif > > -#define PGC_preserved (PGC_extra | PGC_static) > +#ifndef PGC_colored > +#define PGC_colored 0 > +#endif > + > +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) > > #ifndef PGT_TYPE_INFO_INITIALIZER > #define PGT_TYPE_INFO_INITIALIZER 0 > @@ -1504,6 +1509,7 @@ static void free_heap_pages( > if ( !mfn_valid(page_to_mfn(predecessor)) || > !page_state_is(predecessor, free) || > (predecessor->count_info & PGC_static) || > + (predecessor->count_info & PGC_colored) || How about a 2nd #define (e.g. PGC_no_buddy_merge) to use here and ... > (PFN_ORDER(predecessor) != order) || > (page_to_nid(predecessor) != node) ) > break; > @@ -1528,6 +1534,7 @@ static void free_heap_pages( > if ( !mfn_valid(page_to_mfn(successor)) || > !page_state_is(successor, free) || > (successor->count_info & PGC_static) || > + (successor->count_info & PGC_colored) || ... here? That'll then also avoid me commenting that I don't see why these two PGC_* checks aren't folded. > @@ -1943,6 +1950,161 @@ static unsigned long avail_heap_pages( > return free_pages; > } > > +/************************* > + * COLORED SIDE-ALLOCATOR > + * > + * Pages are grouped by LLC color in lists which are globally referred to as the > + * color heap. Lists are populated in end_boot_allocator(). > + * After initialization there will be N lists where N is the number of > + * available colors on the platform. > + */ > +static struct page_list_head *__ro_after_init _color_heap; > +static unsigned long *__ro_after_init free_colored_pages; > + > +/* Memory required for buddy allocator to work with colored one */ > +#ifdef CONFIG_LLC_COLORING > +static unsigned long __initdata buddy_alloc_size = > + MB(CONFIG_BUDDY_ALLOCATOR_SIZE); > +size_param("buddy-alloc-size", buddy_alloc_size); > + > +#define domain_num_llc_colors(d) (d)->num_llc_colors > +#define domain_llc_color(d, i) (d)->llc_colors[(i)] Nit: Unnecessary parentheses inside the square brackets. > +#else > +static unsigned long __initdata buddy_alloc_size; > + > +#define domain_num_llc_colors(d) 0 > +#define domain_llc_color(d, i) 0 > +#endif > + > +#define color_heap(color) (&_color_heap[color]) Wouldn't this better live next to _color_heap()'s definition? > +static void free_color_heap_page(struct page_info *pg, bool need_scrub) > +{ > + unsigned int color = page_to_llc_color(pg); > + struct page_list_head *head = color_heap(color); > + > + spin_lock(&heap_lock); > + > + mark_page_free(pg, page_to_mfn(pg)); > + > + if ( need_scrub ) > + { > + pg->count_info |= PGC_need_scrub; > + poison_one_page(pg); > + } > + > + pg->count_info |= PGC_colored; The page transiently losing PGC_colored is not an issue then (presumably because of holding the heap lock)? Did you consider having mark_page_free() also use PGC_preserved? > + free_colored_pages[color]++; > + page_list_add(pg, head); > + > + spin_unlock(&heap_lock); > +} > + > +static struct page_info *alloc_color_heap_page(unsigned int memflags, > + const struct domain *d) > +{ > + struct page_info *pg = NULL; > + unsigned int i, color = 0; > + unsigned long max = 0; > + bool need_tlbflush = false; > + uint32_t tlbflush_timestamp = 0; > + bool scrub = !(memflags & MEMF_no_scrub); > + > + spin_lock(&heap_lock); > + > + for ( i = 0; i < domain_num_llc_colors(d); i++ ) > + { > + unsigned long free = free_colored_pages[domain_llc_color(d, i)]; > + > + if ( free > max ) > + { > + color = domain_llc_color(d, i); > + pg = page_list_first(color_heap(color)); > + max = free; > + } > + } The apporach is likely fine at least initially, but: By going from where the most free pages are, you're not necessarily evenly distributing a domain's pages over the colors it may use, unless the domain uses its set of colors exclusively. > + if ( !pg ) > + { > + spin_unlock(&heap_lock); > + return NULL; > + } > + > + pg->count_info = PGC_state_inuse | PGC_colored | > + (pg->count_info & PGC_need_scrub); Isn't PGC_colored already set on the page? Together with ... > + free_colored_pages[color]--; > + page_list_del(pg, color_heap(color)); > + > + if ( !(memflags & MEMF_no_tlbflush) ) > + accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp); > + > + init_free_page_fields(pg); > + > + spin_unlock(&heap_lock); > + > + if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub ) ... this, can't the above be simplified? > + scrub_one_page(pg); > + else if ( scrub ) > + check_one_page(pg); > + > + if ( need_tlbflush ) > + filtered_flush_tlb_mask(tlbflush_timestamp); > + > + flush_page_to_ram(mfn_x(page_to_mfn(pg)), > + !(memflags & MEMF_no_icache_flush)); > + > + return pg; > +} > + > +static void __init init_color_heap_pages(struct page_info *pg, > + unsigned long nr_pages) > +{ > + unsigned int i; > + bool need_scrub = opt_bootscrub == BOOTSCRUB_IDLE; > + > + if ( buddy_alloc_size ) > + { > + unsigned long buddy_pages = min(PFN_DOWN(buddy_alloc_size), nr_pages); > + > + init_heap_pages(pg, buddy_pages); > + nr_pages -= buddy_pages; > + buddy_alloc_size -= buddy_pages << PAGE_SHIFT; > + pg += buddy_pages; > + } > + > + if ( !_color_heap ) > + { > + unsigned int max_nr_colors = get_max_nr_llc_colors(); > + > + _color_heap = xmalloc_array(struct page_list_head, max_nr_colors); > + free_colored_pages = xzalloc_array(unsigned long, max_nr_colors); > + if ( !_color_heap || !free_colored_pages ) > + panic("Can't allocate colored heap. Buddy reserved size is too low"); > + > + for ( i = 0; i < max_nr_colors; i++ ) > + INIT_PAGE_LIST_HEAD(color_heap(i)); > + } > + > + if ( nr_pages ) > + printk(XENLOG_DEBUG > + "Init color heap with %lu pages, start MFN: %"PRI_mfn"\n", > + nr_pages, mfn_x(page_to_mfn(pg))); This message can be issued more than once. Is that really desirable / necessary? > @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( > if ( memflags & MEMF_no_owner ) > memflags |= MEMF_no_refcount; > > - if ( !dma_bitsize ) > + /* Only domains are supported for coloring */ > + if ( d && llc_coloring_enabled ) > + { > + /* Colored allocation must be done on 0 order */ > + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) > + return NULL; > + } I think I had asked before: What about MEMF_node() or MEMF_bits() having been used by the caller? Jan
Hi Jan, On Thu, Feb 1, 2024 at 4:53 PM Jan Beulich <jbeulich@suse.com> wrote: > > On 29.01.2024 18:18, Carlo Nonato wrote: > > @@ -157,7 +158,11 @@ > > #define PGC_static 0 > > #endif > > > > -#define PGC_preserved (PGC_extra | PGC_static) > > +#ifndef PGC_colored > > +#define PGC_colored 0 > > +#endif > > + > > +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) > > > > #ifndef PGT_TYPE_INFO_INITIALIZER > > #define PGT_TYPE_INFO_INITIALIZER 0 > > @@ -1504,6 +1509,7 @@ static void free_heap_pages( > > if ( !mfn_valid(page_to_mfn(predecessor)) || > > !page_state_is(predecessor, free) || > > (predecessor->count_info & PGC_static) || > > + (predecessor->count_info & PGC_colored) || > > How about a 2nd #define (e.g. PGC_no_buddy_merge) to use here and ... > > > (PFN_ORDER(predecessor) != order) || > > (page_to_nid(predecessor) != node) ) > > break; > > @@ -1528,6 +1534,7 @@ static void free_heap_pages( > > if ( !mfn_valid(page_to_mfn(successor)) || > > !page_state_is(successor, free) || > > (successor->count_info & PGC_static) || > > + (successor->count_info & PGC_colored) || > > ... here? That'll then also avoid me commenting that I don't see why > these two PGC_* checks aren't folded. Yes for me it's ok (I even suggested that in v5). Do you want this change to be merged with the previous patch? Or should they all belong to this one? > > @@ -1943,6 +1950,161 @@ static unsigned long avail_heap_pages( > > return free_pages; > > } > > > > +/************************* > > + * COLORED SIDE-ALLOCATOR > > + * > > + * Pages are grouped by LLC color in lists which are globally referred to as the > > + * color heap. Lists are populated in end_boot_allocator(). > > + * After initialization there will be N lists where N is the number of > > + * available colors on the platform. > > + */ > > +static struct page_list_head *__ro_after_init _color_heap; > > +static unsigned long *__ro_after_init free_colored_pages; > > + > > +/* Memory required for buddy allocator to work with colored one */ > > +#ifdef CONFIG_LLC_COLORING > > +static unsigned long __initdata buddy_alloc_size = > > + MB(CONFIG_BUDDY_ALLOCATOR_SIZE); > > +size_param("buddy-alloc-size", buddy_alloc_size); > > + > > +#define domain_num_llc_colors(d) (d)->num_llc_colors > > +#define domain_llc_color(d, i) (d)->llc_colors[(i)] > > Nit: Unnecessary parentheses inside the square brackets. > > > +#else > > +static unsigned long __initdata buddy_alloc_size; > > + > > +#define domain_num_llc_colors(d) 0 > > +#define domain_llc_color(d, i) 0 > > +#endif > > + > > +#define color_heap(color) (&_color_heap[color]) > > Wouldn't this better live next to _color_heap()'s definition? > > > +static void free_color_heap_page(struct page_info *pg, bool need_scrub) > > +{ > > + unsigned int color = page_to_llc_color(pg); > > + struct page_list_head *head = color_heap(color); > > + > > + spin_lock(&heap_lock); > > + > > + mark_page_free(pg, page_to_mfn(pg)); > > + > > + if ( need_scrub ) > > + { > > + pg->count_info |= PGC_need_scrub; > > + poison_one_page(pg); > > + } > > + > > + pg->count_info |= PGC_colored; > > The page transiently losing PGC_colored is not an issue then (presumably > because of holding the heap lock)? Did you consider having mark_page_free() > also use PGC_preserved? I did something similar to what it's done in unprepare_staticmem_pages(): first mark_page_free() is called and then PGC_static is added to count_info. > > + free_colored_pages[color]++; > > + page_list_add(pg, head); > > + > > + spin_unlock(&heap_lock); > > +} > > + > > +static struct page_info *alloc_color_heap_page(unsigned int memflags, > > + const struct domain *d) > > +{ > > + struct page_info *pg = NULL; > > + unsigned int i, color = 0; > > + unsigned long max = 0; > > + bool need_tlbflush = false; > > + uint32_t tlbflush_timestamp = 0; > > + bool scrub = !(memflags & MEMF_no_scrub); > > + > > + spin_lock(&heap_lock); > > + > > + for ( i = 0; i < domain_num_llc_colors(d); i++ ) > > + { > > + unsigned long free = free_colored_pages[domain_llc_color(d, i)]; > > + > > + if ( free > max ) > > + { > > + color = domain_llc_color(d, i); > > + pg = page_list_first(color_heap(color)); > > + max = free; > > + } > > + } > > The apporach is likely fine at least initially, but: By going from where > the most free pages are, you're not necessarily evenly distributing a > domain's pages over the colors it may use, unless the domain uses its > set of colors exclusively. We don't find it problematic since the exclusive set of colors are to be preferred (as per the documentation). > > + if ( !pg ) > > + { > > + spin_unlock(&heap_lock); > > + return NULL; > > + } > > + > > + pg->count_info = PGC_state_inuse | PGC_colored | > > + (pg->count_info & PGC_need_scrub); > > Isn't PGC_colored already set on the page? Yes but here I need to make sure that only PGC_state_inuse | PGC_colored are used so an assignment seems legit. > Together with ... > > > + free_colored_pages[color]--; > > + page_list_del(pg, color_heap(color)); > > + > > + if ( !(memflags & MEMF_no_tlbflush) ) > > + accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp); > > + > > + init_free_page_fields(pg); > > + > > + spin_unlock(&heap_lock); > > + > > + if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub ) > > ... this, can't the above be simplified? I tried to replicate what happens in alloc_heap_pages() where: > /* Preserve PGC_need_scrub so we can check it after lock is dropped. */ > pg[i].count_info = PGC_state_inuse | (pg[i].count_info & PGC_need_scrub); and then after the unlock the bit is tested. > > + scrub_one_page(pg); > > + else if ( scrub ) > > + check_one_page(pg); > > + > > + if ( need_tlbflush ) > > + filtered_flush_tlb_mask(tlbflush_timestamp); > > + > > + flush_page_to_ram(mfn_x(page_to_mfn(pg)), > > + !(memflags & MEMF_no_icache_flush)); > > + > > + return pg; > > +} > > + > > +static void __init init_color_heap_pages(struct page_info *pg, > > + unsigned long nr_pages) > > +{ > > + unsigned int i; > > + bool need_scrub = opt_bootscrub == BOOTSCRUB_IDLE; > > + > > + if ( buddy_alloc_size ) > > + { > > + unsigned long buddy_pages = min(PFN_DOWN(buddy_alloc_size), nr_pages); > > + > > + init_heap_pages(pg, buddy_pages); > > + nr_pages -= buddy_pages; > > + buddy_alloc_size -= buddy_pages << PAGE_SHIFT; > > + pg += buddy_pages; > > + } > > + > > + if ( !_color_heap ) > > + { > > + unsigned int max_nr_colors = get_max_nr_llc_colors(); > > + > > + _color_heap = xmalloc_array(struct page_list_head, max_nr_colors); > > + free_colored_pages = xzalloc_array(unsigned long, max_nr_colors); > > + if ( !_color_heap || !free_colored_pages ) > > + panic("Can't allocate colored heap. Buddy reserved size is too low"); > > + > > + for ( i = 0; i < max_nr_colors; i++ ) > > + INIT_PAGE_LIST_HEAD(color_heap(i)); > > + } > > + > > + if ( nr_pages ) > > + printk(XENLOG_DEBUG > > + "Init color heap with %lu pages, start MFN: %"PRI_mfn"\n", > > + nr_pages, mfn_x(page_to_mfn(pg))); > > This message can be issued more than once. Is that really desirable / > necessary? No I can drop it. > > @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( > > if ( memflags & MEMF_no_owner ) > > memflags |= MEMF_no_refcount; > > > > - if ( !dma_bitsize ) > > + /* Only domains are supported for coloring */ > > + if ( d && llc_coloring_enabled ) > > + { > > + /* Colored allocation must be done on 0 order */ > > + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) > > + return NULL; > > + } > > I think I had asked before: What about MEMF_node() or MEMF_bits() > having been used by the caller? MEMF_node() is used for NUMA, right? I think that for the moment, since cache coloring is supported only on arm64 and NUMA is not yet supported, it's safe to ignore it. I'm not sure I understood what MEMF_bits() is for. It restricts the allocator to return pages in some range, right? Thanks. > Jan
On 05.02.2024 12:59, Carlo Nonato wrote: > On Thu, Feb 1, 2024 at 4:53 PM Jan Beulich <jbeulich@suse.com> wrote: >> On 29.01.2024 18:18, Carlo Nonato wrote: >>> @@ -157,7 +158,11 @@ >>> #define PGC_static 0 >>> #endif >>> >>> -#define PGC_preserved (PGC_extra | PGC_static) >>> +#ifndef PGC_colored >>> +#define PGC_colored 0 >>> +#endif >>> + >>> +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) >>> >>> #ifndef PGT_TYPE_INFO_INITIALIZER >>> #define PGT_TYPE_INFO_INITIALIZER 0 >>> @@ -1504,6 +1509,7 @@ static void free_heap_pages( >>> if ( !mfn_valid(page_to_mfn(predecessor)) || >>> !page_state_is(predecessor, free) || >>> (predecessor->count_info & PGC_static) || >>> + (predecessor->count_info & PGC_colored) || >> >> How about a 2nd #define (e.g. PGC_no_buddy_merge) to use here and ... >> >>> (PFN_ORDER(predecessor) != order) || >>> (page_to_nid(predecessor) != node) ) >>> break; >>> @@ -1528,6 +1534,7 @@ static void free_heap_pages( >>> if ( !mfn_valid(page_to_mfn(successor)) || >>> !page_state_is(successor, free) || >>> (successor->count_info & PGC_static) || >>> + (successor->count_info & PGC_colored) || >> >> ... here? That'll then also avoid me commenting that I don't see why >> these two PGC_* checks aren't folded. > > Yes for me it's ok (I even suggested that in v5). Do you want this change to > be merged with the previous patch? Or should they all belong to this one? Or make yet another small prereq patch? >>> +static void free_color_heap_page(struct page_info *pg, bool need_scrub) >>> +{ >>> + unsigned int color = page_to_llc_color(pg); >>> + struct page_list_head *head = color_heap(color); >>> + >>> + spin_lock(&heap_lock); >>> + >>> + mark_page_free(pg, page_to_mfn(pg)); >>> + >>> + if ( need_scrub ) >>> + { >>> + pg->count_info |= PGC_need_scrub; >>> + poison_one_page(pg); >>> + } >>> + >>> + pg->count_info |= PGC_colored; >> >> The page transiently losing PGC_colored is not an issue then (presumably >> because of holding the heap lock)? Did you consider having mark_page_free() >> also use PGC_preserved? > > I did something similar to what it's done in unprepare_staticmem_pages(): > first mark_page_free() is called and then PGC_static is added to count_info. I had guessed this would have been served as reference, yet by saying what you did you don't really answer my question. (Really I'm not entirely sure the similar staticmem behavior is entirely correct.) >>> + if ( !pg ) >>> + { >>> + spin_unlock(&heap_lock); >>> + return NULL; >>> + } >>> + >>> + pg->count_info = PGC_state_inuse | PGC_colored | >>> + (pg->count_info & PGC_need_scrub); >> >> Isn't PGC_colored already set on the page? > > Yes but here I need to make sure that only PGC_state_inuse | PGC_colored are > used so an assignment seems legit. Well, yes, you won't be able to avoid an assignment, but couldn't you preserve PGC_colored just like PGC_need_scrub is preserved? Or else at least assert the flag is set, to avoid giving the impression that right here pages can suddenly become "colored" ones? Then again them becoming so _may_ be needed during initialization. >> Together with ... >> >>> + free_colored_pages[color]--; >>> + page_list_del(pg, color_heap(color)); >>> + >>> + if ( !(memflags & MEMF_no_tlbflush) ) >>> + accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp); >>> + >>> + init_free_page_fields(pg); >>> + >>> + spin_unlock(&heap_lock); >>> + >>> + if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub ) >> >> ... this, can't the above be simplified? > > I tried to replicate what happens in alloc_heap_pages() where: > >> /* Preserve PGC_need_scrub so we can check it after lock is dropped. */ >> pg[i].count_info = PGC_state_inuse | (pg[i].count_info & PGC_need_scrub); > > and then after the unlock the bit is tested. Again I was indeed assuming that you used existing code as reference. Yet again my question was whether that's actually what is needed here (which then may or may not mean that it could be done differently also there). >>> @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( >>> if ( memflags & MEMF_no_owner ) >>> memflags |= MEMF_no_refcount; >>> >>> - if ( !dma_bitsize ) >>> + /* Only domains are supported for coloring */ >>> + if ( d && llc_coloring_enabled ) >>> + { >>> + /* Colored allocation must be done on 0 order */ >>> + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) >>> + return NULL; >>> + } >> >> I think I had asked before: What about MEMF_node() or MEMF_bits() >> having been used by the caller? > > MEMF_node() is used for NUMA, right? I think that for the moment, since cache > coloring is supported only on arm64 and NUMA is not yet supported, it's safe > to ignore it. NUMA or not, I'm wary of silent ignoring of inputs. I'd rather request that, just like with staticmem, precautions are then taken to avoid coloring and NUMA to be used together. Recall that much like your work, work to support NUMA is also in progress (unless it was canceled, but not so announced). > I'm not sure I understood what MEMF_bits() is for. It restricts the allocator > to return pages in some range, right? Correct. Jan
Hi Jan, On Mon, Feb 5, 2024 at 1:38 PM Jan Beulich <jbeulich@suse.com> wrote: > > On 05.02.2024 12:59, Carlo Nonato wrote: > > On Thu, Feb 1, 2024 at 4:53 PM Jan Beulich <jbeulich@suse.com> wrote: > >> On 29.01.2024 18:18, Carlo Nonato wrote: > >>> @@ -157,7 +158,11 @@ > >>> #define PGC_static 0 > >>> #endif > >>> > >>> -#define PGC_preserved (PGC_extra | PGC_static) > >>> +#ifndef PGC_colored > >>> +#define PGC_colored 0 > >>> +#endif > >>> + > >>> +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) > >>> > >>> #ifndef PGT_TYPE_INFO_INITIALIZER > >>> #define PGT_TYPE_INFO_INITIALIZER 0 > >>> @@ -1504,6 +1509,7 @@ static void free_heap_pages( > >>> if ( !mfn_valid(page_to_mfn(predecessor)) || > >>> !page_state_is(predecessor, free) || > >>> (predecessor->count_info & PGC_static) || > >>> + (predecessor->count_info & PGC_colored) || > >> > >> How about a 2nd #define (e.g. PGC_no_buddy_merge) to use here and ... > >> > >>> (PFN_ORDER(predecessor) != order) || > >>> (page_to_nid(predecessor) != node) ) > >>> break; > >>> @@ -1528,6 +1534,7 @@ static void free_heap_pages( > >>> if ( !mfn_valid(page_to_mfn(successor)) || > >>> !page_state_is(successor, free) || > >>> (successor->count_info & PGC_static) || > >>> + (successor->count_info & PGC_colored) || > >> > >> ... here? That'll then also avoid me commenting that I don't see why > >> these two PGC_* checks aren't folded. > > > > Yes for me it's ok (I even suggested that in v5). Do you want this change to > > be merged with the previous patch? Or should they all belong to this one? > > Or make yet another small prereq patch? Ok. > >>> +static void free_color_heap_page(struct page_info *pg, bool need_scrub) > >>> +{ > >>> + unsigned int color = page_to_llc_color(pg); > >>> + struct page_list_head *head = color_heap(color); > >>> + > >>> + spin_lock(&heap_lock); > >>> + > >>> + mark_page_free(pg, page_to_mfn(pg)); > >>> + > >>> + if ( need_scrub ) > >>> + { > >>> + pg->count_info |= PGC_need_scrub; > >>> + poison_one_page(pg); > >>> + } > >>> + > >>> + pg->count_info |= PGC_colored; > >> > >> The page transiently losing PGC_colored is not an issue then (presumably > >> because of holding the heap lock)? Did you consider having mark_page_free() > >> also use PGC_preserved? > > > > I did something similar to what it's done in unprepare_staticmem_pages(): > > first mark_page_free() is called and then PGC_static is added to count_info. > > I had guessed this would have been served as reference, yet by saying what > you did you don't really answer my question. (Really I'm not entirely sure > the similar staticmem behavior is entirely correct.) I think holding the heap lock allows writing pg->count_info. Anyway, yes, I can use PGC_preserved in mark_page_free(), but then pages must be correctly initialized with PGC_colored elsewhere. > >>> + if ( !pg ) > >>> + { > >>> + spin_unlock(&heap_lock); > >>> + return NULL; > >>> + } > >>> + > >>> + pg->count_info = PGC_state_inuse | PGC_colored | > >>> + (pg->count_info & PGC_need_scrub); > >> > >> Isn't PGC_colored already set on the page? > > > > Yes but here I need to make sure that only PGC_state_inuse | PGC_colored are > > used so an assignment seems legit. > > Well, yes, you won't be able to avoid an assignment, but couldn't you > preserve PGC_colored just like PGC_need_scrub is preserved? Or else > at least assert the flag is set, to avoid giving the impression that > right here pages can suddenly become "colored" ones? Then again them > becoming so _may_ be needed during initialization. Yeah, now I understood. > > >> Together with ... > >> > >>> + free_colored_pages[color]--; > >>> + page_list_del(pg, color_heap(color)); > >>> + > >>> + if ( !(memflags & MEMF_no_tlbflush) ) > >>> + accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp); > >>> + > >>> + init_free_page_fields(pg); > >>> + > >>> + spin_unlock(&heap_lock); > >>> + > >>> + if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub ) > >> > >> ... this, can't the above be simplified? > > > > I tried to replicate what happens in alloc_heap_pages() where: > > > >> /* Preserve PGC_need_scrub so we can check it after lock is dropped. */ > >> pg[i].count_info = PGC_state_inuse | (pg[i].count_info & PGC_need_scrub); > > > > and then after the unlock the bit is tested. > > Again I was indeed assuming that you used existing code as reference. > Yet again my question was whether that's actually what is needed here > (which then may or may not mean that it could be done differently > also there). Ok. Probably isn't needed since we are referring to a single page. So I can first test for the bit, save the result in a variable and remove the preservation, then later use the variable instead of test_and_clear_bit(). > >>> @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( > >>> if ( memflags & MEMF_no_owner ) > >>> memflags |= MEMF_no_refcount; > >>> > >>> - if ( !dma_bitsize ) > >>> + /* Only domains are supported for coloring */ > >>> + if ( d && llc_coloring_enabled ) > >>> + { > >>> + /* Colored allocation must be done on 0 order */ > >>> + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) > >>> + return NULL; > >>> + } > >> > >> I think I had asked before: What about MEMF_node() or MEMF_bits() > >> having been used by the caller? > > > > MEMF_node() is used for NUMA, right? I think that for the moment, since cache > > coloring is supported only on arm64 and NUMA is not yet supported, it's safe > > to ignore it. > > NUMA or not, I'm wary of silent ignoring of inputs. I'd rather request > that, just like with staticmem, precautions are then taken to avoid > coloring and NUMA to be used together. Recall that much like your work, > work to support NUMA is also in progress (unless it was canceled, but > not so announced). Ok. > > I'm not sure I understood what MEMF_bits() is for. It restricts the allocator > > to return pages in some range, right? > > Correct. So I need to think about this, since I see no easy way of supporting such a thing with the current colored allocator implementation. Do you have any suggestions? Thanks. > Jan
On 05.02.2024 15:10, Carlo Nonato wrote: > On Mon, Feb 5, 2024 at 1:38 PM Jan Beulich <jbeulich@suse.com> wrote: >> On 05.02.2024 12:59, Carlo Nonato wrote: >>> On Thu, Feb 1, 2024 at 4:53 PM Jan Beulich <jbeulich@suse.com> wrote: >>>> On 29.01.2024 18:18, Carlo Nonato wrote: >>>>> @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( >>>>> if ( memflags & MEMF_no_owner ) >>>>> memflags |= MEMF_no_refcount; >>>>> >>>>> - if ( !dma_bitsize ) >>>>> + /* Only domains are supported for coloring */ >>>>> + if ( d && llc_coloring_enabled ) >>>>> + { >>>>> + /* Colored allocation must be done on 0 order */ >>>>> + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) >>>>> + return NULL; >>>>> + } >>>> >>>> I think I had asked before: What about MEMF_node() or MEMF_bits() >>>> having been used by the caller? >>> >>> MEMF_node() is used for NUMA, right? I think that for the moment, since cache >>> coloring is supported only on arm64 and NUMA is not yet supported, it's safe >>> to ignore it. >> >> NUMA or not, I'm wary of silent ignoring of inputs. I'd rather request >> that, just like with staticmem, precautions are then taken to avoid >> coloring and NUMA to be used together. Recall that much like your work, >> work to support NUMA is also in progress (unless it was canceled, but >> not so announced). > > Ok. > >>> I'm not sure I understood what MEMF_bits() is for. It restricts the allocator >>> to return pages in some range, right? >> >> Correct. > > So I need to think about this, since I see no easy way of supporting such a > thing with the current colored allocator implementation. > Do you have any suggestions? Well, at least properly fail requests you can't fulfill. Jan
diff --git a/docs/misc/cache-coloring.rst b/docs/misc/cache-coloring.rst index bf055d7e7f..db1a719698 100644 --- a/docs/misc/cache-coloring.rst +++ b/docs/misc/cache-coloring.rst @@ -9,6 +9,9 @@ To compile LLC coloring support set ``CONFIG_LLC_COLORING=y``. If needed, change the maximum number of colors with ``CONFIG_NR_LLC_COLORS=<n>``. +If needed, change the buddy allocator reserved size with +``CONFIG_BUDDY_ALLOCATOR_SIZE=<n>``. + Compile Xen and the toolstack and then configure it via `Command line parameters`_. For DomUs follow `DomUs configuration`_. @@ -87,6 +90,8 @@ More specific documentation is available at `docs/misc/xen-command-line.pandoc`. +----------------------+-------------------------------+ | ``dom0-llc-colors`` | Dom0 color configuration | +----------------------+-------------------------------+ +| ``buddy-alloc-size`` | Buddy allocator reserved size | ++----------------------+-------------------------------+ Colors selection format *********************** @@ -161,6 +166,17 @@ DomUs colors can be set via Device Tree, also for Dom0less configurations **Note:** If no color configuration is provided for a domain, the default one, which corresponds to all available colors is used instead. +Colored allocator and buddy allocator +************************************* + +The colored allocator distributes pages based on color configurations of +domains so that each domains only gets pages of its own colors. +The colored allocator is meant as an alternative to the buddy allocator because +its allocation policy is by definition incompatible with the generic one. Since +the Xen heap is not colored yet, we need to support the coexistence of the two +allocators and some memory must be left for the buddy one. Buddy memory +reservation is configured via Kconfig or via command-line. + Known issues and limitations **************************** @@ -171,3 +187,24 @@ In the domain configuration, "xen,static-mem" allows memory to be statically allocated to the domain. This isn't possible when LLC coloring is enabled, because that memory can't be guaranteed to use only colors assigned to the domain. + +Cache coloring is intended only for embedded systems +#################################################### + +The current implementation aims to satisfy the need of predictability in +embedded systems with small amount of memory to be managed in a colored way. +Given that, some shortcuts are taken in the development. Expect worse +performances on larger systems. + +Colored allocator can only make use of order-0 pages +#################################################### + +The cache coloring technique relies on memory mappings and on the smallest +amount of memory that can be mapped to achieve the maximum number of colors +(cache partitions) possible. This amount is what is normally called a page and, +in Xen terminology, the order-0 page is the smallest one. The fairly simple +colored allocator currently implemented, makes use only of such pages. +It must be said that a more complex one could, in theory, adopt higher order +pages if the colors selection contained adjacent colors. Two subsequent colors, +for example, can be represented by an order-1 page, four colors correspond to +an order-2 page, etc. diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc index 25da997b5b..d52d38b97a 100644 --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -270,6 +270,20 @@ and not running softirqs. Reduce this if softirqs are not being run frequently enough. Setting this to a high value may cause boot failure, particularly if the NMI watchdog is also enabled. +### buddy-alloc-size (arm64) +> `= <size>` + +> Default: `64M` + +Amount of memory reserved for the buddy allocator when colored allocator is +active. This options is parsed only when LLC coloring support is enabled. +The colored allocator is meant as an alternative to the buddy allocator, +because its allocation policy is by definition incompatible with the generic +one. Since the Xen heap systems is not colored yet, we need to support the +coexistence of the two allocators for now. This parameter, which is optional +and for expert only, it's used to set the amount of memory reserved to the +buddy allocator. + ### cet = List of [ shstk=<bool>, ibt=<bool> ] diff --git a/xen/arch/Kconfig b/xen/arch/Kconfig index c1157bcbcb..b50082eb84 100644 --- a/xen/arch/Kconfig +++ b/xen/arch/Kconfig @@ -48,3 +48,11 @@ config NR_LLC_COLORS bound. The runtime value is autocomputed or manually set via cmdline. The default value corresponds to an 8 MiB 16-ways LLC, which should be more than what needed in the general case. + +config BUDDY_ALLOCATOR_SIZE + int "Buddy allocator reserved memory size (MiB)" + default "64" + depends on LLC_COLORING + help + Amount of memory reserved for the buddy allocator to serve Xen heap, + working alongside the colored one. diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h index cbcf3bf147..1829c559d6 100644 --- a/xen/arch/arm/include/asm/mm.h +++ b/xen/arch/arm/include/asm/mm.h @@ -145,6 +145,11 @@ struct page_info #else #define PGC_static 0 #endif +#ifdef CONFIG_LLC_COLORING +/* Page is cache colored */ +#define _PGC_colored PG_shift(4) +#define PGC_colored PG_mask(1, 4) +#endif /* ... */ /* Page is broken? */ #define _PGC_broken PG_shift(7) diff --git a/xen/common/llc-coloring.c b/xen/common/llc-coloring.c index a932a61e0c..25e0861733 100644 --- a/xen/common/llc-coloring.c +++ b/xen/common/llc-coloring.c @@ -21,6 +21,9 @@ static unsigned int __ro_after_init max_nr_colors = CONFIG_NR_LLC_COLORS; static unsigned int __initdata dom0_colors[CONFIG_NR_LLC_COLORS]; static unsigned int __initdata dom0_num_colors; +#define mfn_color_mask (max_nr_colors - 1) +#define mfn_to_color(mfn) (mfn_x(mfn) & mfn_color_mask) + /* * Parse the coloring configuration given in the buf string, following the * syntax below. @@ -277,6 +280,16 @@ int domain_set_llc_colors_from_str(struct domain *d, const char *str) return domain_check_colors(d); } +unsigned int page_to_llc_color(const struct page_info *pg) +{ + return mfn_to_color(page_to_mfn(pg)); +} + +unsigned int get_max_nr_llc_colors(void) +{ + return max_nr_colors; +} + /* * Local variables: * mode: C diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 7c135e0bb4..469d78869f 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -126,6 +126,7 @@ #include <xen/irq.h> #include <xen/keyhandler.h> #include <xen/lib.h> +#include <xen/llc-coloring.h> #include <xen/mm.h> #include <xen/nodemask.h> #include <xen/numa.h> @@ -157,7 +158,11 @@ #define PGC_static 0 #endif -#define PGC_preserved (PGC_extra | PGC_static) +#ifndef PGC_colored +#define PGC_colored 0 +#endif + +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) #ifndef PGT_TYPE_INFO_INITIALIZER #define PGT_TYPE_INFO_INITIALIZER 0 @@ -1504,6 +1509,7 @@ static void free_heap_pages( if ( !mfn_valid(page_to_mfn(predecessor)) || !page_state_is(predecessor, free) || (predecessor->count_info & PGC_static) || + (predecessor->count_info & PGC_colored) || (PFN_ORDER(predecessor) != order) || (page_to_nid(predecessor) != node) ) break; @@ -1528,6 +1534,7 @@ static void free_heap_pages( if ( !mfn_valid(page_to_mfn(successor)) || !page_state_is(successor, free) || (successor->count_info & PGC_static) || + (successor->count_info & PGC_colored) || (PFN_ORDER(successor) != order) || (page_to_nid(successor) != node) ) break; @@ -1943,6 +1950,161 @@ static unsigned long avail_heap_pages( return free_pages; } +/************************* + * COLORED SIDE-ALLOCATOR + * + * Pages are grouped by LLC color in lists which are globally referred to as the + * color heap. Lists are populated in end_boot_allocator(). + * After initialization there will be N lists where N is the number of + * available colors on the platform. + */ +static struct page_list_head *__ro_after_init _color_heap; +static unsigned long *__ro_after_init free_colored_pages; + +/* Memory required for buddy allocator to work with colored one */ +#ifdef CONFIG_LLC_COLORING +static unsigned long __initdata buddy_alloc_size = + MB(CONFIG_BUDDY_ALLOCATOR_SIZE); +size_param("buddy-alloc-size", buddy_alloc_size); + +#define domain_num_llc_colors(d) (d)->num_llc_colors +#define domain_llc_color(d, i) (d)->llc_colors[(i)] +#else +static unsigned long __initdata buddy_alloc_size; + +#define domain_num_llc_colors(d) 0 +#define domain_llc_color(d, i) 0 +#endif + +#define color_heap(color) (&_color_heap[color]) + +static void free_color_heap_page(struct page_info *pg, bool need_scrub) +{ + unsigned int color = page_to_llc_color(pg); + struct page_list_head *head = color_heap(color); + + spin_lock(&heap_lock); + + mark_page_free(pg, page_to_mfn(pg)); + + if ( need_scrub ) + { + pg->count_info |= PGC_need_scrub; + poison_one_page(pg); + } + + pg->count_info |= PGC_colored; + free_colored_pages[color]++; + page_list_add(pg, head); + + spin_unlock(&heap_lock); +} + +static struct page_info *alloc_color_heap_page(unsigned int memflags, + const struct domain *d) +{ + struct page_info *pg = NULL; + unsigned int i, color = 0; + unsigned long max = 0; + bool need_tlbflush = false; + uint32_t tlbflush_timestamp = 0; + bool scrub = !(memflags & MEMF_no_scrub); + + spin_lock(&heap_lock); + + for ( i = 0; i < domain_num_llc_colors(d); i++ ) + { + unsigned long free = free_colored_pages[domain_llc_color(d, i)]; + + if ( free > max ) + { + color = domain_llc_color(d, i); + pg = page_list_first(color_heap(color)); + max = free; + } + } + + if ( !pg ) + { + spin_unlock(&heap_lock); + return NULL; + } + + pg->count_info = PGC_state_inuse | PGC_colored | + (pg->count_info & PGC_need_scrub); + free_colored_pages[color]--; + page_list_del(pg, color_heap(color)); + + if ( !(memflags & MEMF_no_tlbflush) ) + accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp); + + init_free_page_fields(pg); + + spin_unlock(&heap_lock); + + if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub ) + scrub_one_page(pg); + else if ( scrub ) + check_one_page(pg); + + if ( need_tlbflush ) + filtered_flush_tlb_mask(tlbflush_timestamp); + + flush_page_to_ram(mfn_x(page_to_mfn(pg)), + !(memflags & MEMF_no_icache_flush)); + + return pg; +} + +static void __init init_color_heap_pages(struct page_info *pg, + unsigned long nr_pages) +{ + unsigned int i; + bool need_scrub = opt_bootscrub == BOOTSCRUB_IDLE; + + if ( buddy_alloc_size ) + { + unsigned long buddy_pages = min(PFN_DOWN(buddy_alloc_size), nr_pages); + + init_heap_pages(pg, buddy_pages); + nr_pages -= buddy_pages; + buddy_alloc_size -= buddy_pages << PAGE_SHIFT; + pg += buddy_pages; + } + + if ( !_color_heap ) + { + unsigned int max_nr_colors = get_max_nr_llc_colors(); + + _color_heap = xmalloc_array(struct page_list_head, max_nr_colors); + free_colored_pages = xzalloc_array(unsigned long, max_nr_colors); + if ( !_color_heap || !free_colored_pages ) + panic("Can't allocate colored heap. Buddy reserved size is too low"); + + for ( i = 0; i < max_nr_colors; i++ ) + INIT_PAGE_LIST_HEAD(color_heap(i)); + } + + if ( nr_pages ) + printk(XENLOG_DEBUG + "Init color heap with %lu pages, start MFN: %"PRI_mfn"\n", + nr_pages, mfn_x(page_to_mfn(pg))); + + for ( i = 0; i < nr_pages; i++ ) + free_color_heap_page(&pg[i], need_scrub); +} + +static void dump_color_heap(void) +{ + unsigned int color; + + printk("Dumping color heap info\n"); + for ( color = 0; color < get_max_nr_llc_colors(); color++ ) + if ( free_colored_pages[color] > 0 ) + printk("Color heap[%u]: %lu pages\n", + color, free_colored_pages[color]); +} + void __init end_boot_allocator(void) { unsigned int i; @@ -1962,7 +2124,13 @@ void __init end_boot_allocator(void) for ( i = nr_bootmem_regions; i-- > 0; ) { struct bootmem_region *r = &bootmem_region_list[i]; - if ( r->s < r->e ) + + if ( r->s >= r->e ) + continue; + + if ( llc_coloring_enabled ) + init_color_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); + else init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); } nr_bootmem_regions = 0; @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages( if ( memflags & MEMF_no_owner ) memflags |= MEMF_no_refcount; - if ( !dma_bitsize ) + /* Only domains are supported for coloring */ + if ( d && llc_coloring_enabled ) + { + /* Colored allocation must be done on 0 order */ + if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL ) + return NULL; + } + else if ( !dma_bitsize ) memflags &= ~MEMF_no_dma; else if ( (dma_zone = bits_to_zone(dma_bitsize)) < zone_hi ) pg = alloc_heap_pages(dma_zone + 1, zone_hi, order, memflags, d); @@ -2483,7 +2658,10 @@ struct page_info *alloc_domheap_pages( } if ( assign_page(pg, order, d, memflags) ) { - free_heap_pages(pg, order, memflags & MEMF_no_scrub); + if ( pg->count_info & PGC_colored ) + free_color_heap_page(pg, memflags & MEMF_no_scrub); + else + free_heap_pages(pg, order, memflags & MEMF_no_scrub); return NULL; } } @@ -2566,7 +2744,10 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) scrub = 1; } - free_heap_pages(pg, order, scrub); + if ( pg->count_info & PGC_colored ) + free_color_heap_page(pg, scrub); + else + free_heap_pages(pg, order, scrub); } if ( drop_dom_ref ) @@ -2673,6 +2854,9 @@ static void cf_check dump_heap(unsigned char key) continue; printk("Node %d has %lu unscrubbed pages\n", i, node_need_scrub[i]); } + + if ( llc_coloring_enabled ) + dump_color_heap(); } static __init int cf_check register_heap_trigger(void) diff --git a/xen/include/xen/llc-coloring.h b/xen/include/xen/llc-coloring.h index 63785c8319..b96a7134ed 100644 --- a/xen/include/xen/llc-coloring.h +++ b/xen/include/xen/llc-coloring.h @@ -31,6 +31,10 @@ int domain_set_llc_colors_from_str(struct domain *d, const char *str); int domain_set_llc_colors_domctl(struct domain *d, const struct xen_domctl_set_llc_colors *config); +struct page_info; +unsigned int page_to_llc_color(const struct page_info *pg); +unsigned int get_max_nr_llc_colors(void); + #endif /* __COLORING_H__ */ /*