Message ID | cefeb63173fa0fac7543315a2abbd4b5a1b25af8.1655242024.git.tim.c.chen@linux.intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Cgroup accounting of memory tier usage | expand |
On Tue, Jun 14, 2022 at 3:26 PM Tim Chen <tim.c.chen@linux.intel.com> wrote: > If we need to restrict toptier memory usage for a cgroup, > we need to retrieve usage of toptier memory efficiently. > Add a page counter to track toptier memory usage directly > so its value can be returned right away. > --- > include/linux/memcontrol.h | 1 + > mm/memcontrol.c | 50 ++++++++++++++++++++++++++++++++------ > 2 files changed, 43 insertions(+), 8 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 9ecead1042b9..b4f727cba1de 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -241,6 +241,7 @@ struct mem_cgroup { > > /* Accounted resources */ > struct page_counter memory; /* Both v1 & v2 */ > + struct page_counter toptier; > > union { > struct page_counter swap; /* v2 only */ > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 2f6e95e6d200..2f20ec2712b8 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -848,6 +848,23 @@ static void mem_cgroup_charge_statistics(struct > mem_cgroup *memcg, > __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages); > } > > +static inline void mem_cgroup_charge_toptier(struct mem_cgroup *memcg, > + int nid, > + int nr_pages) > +{ > + if (!node_is_toptier(nid) || !memcg) > + return; > + > + if (nr_pages >= 0) { > + page_counter_charge(&memcg->toptier, > + (unsigned long) nr_pages); > + } else { > + nr_pages = -nr_pages; > + page_counter_uncharge(&memcg->toptier, > + (unsigned long) nr_pages); > + } > +} > + > When we don't know which pages are being charged, we should still charge the usage to toptier (assuming that toptier always include the default tier), e.g. from try_charge_memcg(). The idea is that when lower tier memory is not used, memcg->toptier and memcg->memory should have the same value. Otherwise, it can cause confusions about where the pages of (memcg->memory - memcg->toptier) go. static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, > enum mem_cgroup_events_target > target) > { > @@ -3027,6 +3044,8 @@ int __memcg_kmem_charge_page(struct page *page, > gfp_t gfp, int order) > if (!ret) { > page->memcg_data = (unsigned long)objcg | > MEMCG_DATA_KMEM; > + mem_cgroup_charge_toptier(page_memcg(page), > + page_to_nid(page), 1 << order); > return 0; > } > obj_cgroup_put(objcg); > @@ -3050,6 +3069,8 @@ void __memcg_kmem_uncharge_page(struct page *page, > int order) > > objcg = __folio_objcg(folio); > obj_cgroup_uncharge_pages(objcg, nr_pages); > + mem_cgroup_charge_toptier(page_memcg(page), > + page_to_nid(page), -nr_pages); > folio->memcg_data = 0; > obj_cgroup_put(objcg); > } > @@ -3947,13 +3968,10 @@ unsigned long mem_cgroup_memtier_usage(struct > mem_cgroup *memcg, > > unsigned long mem_cgroup_toptier_usage(struct mem_cgroup *memcg) > { > - struct memory_tier *top_tier; > - > - top_tier = list_first_entry(&memory_tiers, struct memory_tier, > list); > - if (top_tier) > - return mem_cgroup_memtier_usage(memcg, top_tier); > - else > + if (!memcg) > return 0; > + > + return page_counter_read(&memcg->toptier); > } > > #endif /* CONFIG_NUMA */ > @@ -5228,11 +5246,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state > *parent_css) > memcg->oom_kill_disable = parent->oom_kill_disable; > > page_counter_init(&memcg->memory, &parent->memory); > + page_counter_init(&memcg->toptier, &parent->toptier); > page_counter_init(&memcg->swap, &parent->swap); > page_counter_init(&memcg->kmem, &parent->kmem); > page_counter_init(&memcg->tcpmem, &parent->tcpmem); > } else { > page_counter_init(&memcg->memory, NULL); > + page_counter_init(&memcg->toptier, NULL); > page_counter_init(&memcg->swap, NULL); > page_counter_init(&memcg->kmem, NULL); > page_counter_init(&memcg->tcpmem, NULL); > @@ -5678,6 +5698,8 @@ static int mem_cgroup_move_account(struct page *page, > memcg_check_events(to, nid); > mem_cgroup_charge_statistics(from, -nr_pages); > memcg_check_events(from, nid); > + mem_cgroup_charge_toptier(to, nid, nr_pages); > + mem_cgroup_charge_toptier(from, nid, -nr_pages); > local_irq_enable(); > out_unlock: > folio_unlock(folio); > @@ -6761,6 +6783,7 @@ static int charge_memcg(struct folio *folio, struct > mem_cgroup *memcg, > > local_irq_disable(); > mem_cgroup_charge_statistics(memcg, nr_pages); > + mem_cgroup_charge_toptier(memcg, folio_nid(folio), nr_pages); > memcg_check_events(memcg, folio_nid(folio)); > local_irq_enable(); > out: > @@ -6853,6 +6876,7 @@ struct uncharge_gather { > unsigned long nr_memory; > unsigned long pgpgout; > unsigned long nr_kmem; > + unsigned long nr_toptier; > int nid; > }; > > @@ -6867,6 +6891,7 @@ static void uncharge_batch(const struct > uncharge_gather *ug) > > if (ug->nr_memory) { > page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); > + page_counter_uncharge(&ug->memcg->toptier, ug->nr_toptier); > if (do_memsw_account()) > page_counter_uncharge(&ug->memcg->memsw, > ug->nr_memory); > if (ug->nr_kmem) > @@ -6929,12 +6954,18 @@ static void uncharge_folio(struct folio *folio, > struct uncharge_gather *ug) > ug->nr_memory += nr_pages; > ug->nr_kmem += nr_pages; > > + if (node_is_toptier(folio_nid(folio))) > + ug->nr_toptier += nr_pages; > + > folio->memcg_data = 0; > obj_cgroup_put(objcg); > } else { > /* LRU pages aren't accounted at the root level */ > - if (!mem_cgroup_is_root(memcg)) > + if (!mem_cgroup_is_root(memcg)) { > ug->nr_memory += nr_pages; > + if (node_is_toptier(folio_nid(folio))) > + ug->nr_toptier += nr_pages; > + } > ug->pgpgout++; > > folio->memcg_data = 0; > @@ -7011,6 +7042,7 @@ void mem_cgroup_migrate(struct folio *old, struct > folio *new) > /* Force-charge the new page. The old one will be freed soon */ > if (!mem_cgroup_is_root(memcg)) { > page_counter_charge(&memcg->memory, nr_pages); > + mem_cgroup_charge_toptier(memcg, folio_nid(new), nr_pages); > if (do_memsw_account()) > page_counter_charge(&memcg->memsw, nr_pages); > } > @@ -7231,8 +7263,10 @@ void mem_cgroup_swapout(struct folio *folio, > swp_entry_t entry) > > folio->memcg_data = 0; > > - if (!mem_cgroup_is_root(memcg)) > + if (!mem_cgroup_is_root(memcg)) { > page_counter_uncharge(&memcg->memory, nr_entries); > + mem_cgroup_charge_toptier(memcg, folio_nid(folio), > -nr_entries); > + } > > if (!cgroup_memory_noswap && memcg != swap_memcg) { > if (!mem_cgroup_is_root(swap_memcg)) > -- > 2.35.1 > > >
(Resend in plain text. Sorry.) On Tue, Jun 14, 2022 at 3:26 PM Tim Chen <tim.c.chen@linux.intel.com> wrote: > > If we need to restrict toptier memory usage for a cgroup, > we need to retrieve usage of toptier memory efficiently. > Add a page counter to track toptier memory usage directly > so its value can be returned right away. > --- > include/linux/memcontrol.h | 1 + > mm/memcontrol.c | 50 ++++++++++++++++++++++++++++++++------ > 2 files changed, 43 insertions(+), 8 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 9ecead1042b9..b4f727cba1de 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -241,6 +241,7 @@ struct mem_cgroup { > > /* Accounted resources */ > struct page_counter memory; /* Both v1 & v2 */ > + struct page_counter toptier; > > union { > struct page_counter swap; /* v2 only */ > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 2f6e95e6d200..2f20ec2712b8 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -848,6 +848,23 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, > __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages); > } > > +static inline void mem_cgroup_charge_toptier(struct mem_cgroup *memcg, > + int nid, > + int nr_pages) > +{ > + if (!node_is_toptier(nid) || !memcg) > + return; > + > + if (nr_pages >= 0) { > + page_counter_charge(&memcg->toptier, > + (unsigned long) nr_pages); > + } else { > + nr_pages = -nr_pages; > + page_counter_uncharge(&memcg->toptier, > + (unsigned long) nr_pages); > + } > +} When we don't know which pages are being charged, we should still charge the usage to toptier (assuming that toptier always include the default tier), e.g. from try_charge_memcg(). The idea is that when lower tier memory is not used, memcg->toptier and memcg->memory should have the same value. Otherwise, it can cause confusions about where the pages of (memcg->memory - memcg->toptier) go. > static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, > enum mem_cgroup_events_target target) > { > @@ -3027,6 +3044,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) > if (!ret) { > page->memcg_data = (unsigned long)objcg | > MEMCG_DATA_KMEM; > + mem_cgroup_charge_toptier(page_memcg(page), > + page_to_nid(page), 1 << order); > return 0; > } > obj_cgroup_put(objcg); > @@ -3050,6 +3069,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) > > objcg = __folio_objcg(folio); > obj_cgroup_uncharge_pages(objcg, nr_pages); > + mem_cgroup_charge_toptier(page_memcg(page), > + page_to_nid(page), -nr_pages); > folio->memcg_data = 0; > obj_cgroup_put(objcg); > } > @@ -3947,13 +3968,10 @@ unsigned long mem_cgroup_memtier_usage(struct mem_cgroup *memcg, > > unsigned long mem_cgroup_toptier_usage(struct mem_cgroup *memcg) > { > - struct memory_tier *top_tier; > - > - top_tier = list_first_entry(&memory_tiers, struct memory_tier, list); > - if (top_tier) > - return mem_cgroup_memtier_usage(memcg, top_tier); > - else > + if (!memcg) > return 0; > + > + return page_counter_read(&memcg->toptier); > } > > #endif /* CONFIG_NUMA */ > @@ -5228,11 +5246,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) > memcg->oom_kill_disable = parent->oom_kill_disable; > > page_counter_init(&memcg->memory, &parent->memory); > + page_counter_init(&memcg->toptier, &parent->toptier); > page_counter_init(&memcg->swap, &parent->swap); > page_counter_init(&memcg->kmem, &parent->kmem); > page_counter_init(&memcg->tcpmem, &parent->tcpmem); > } else { > page_counter_init(&memcg->memory, NULL); > + page_counter_init(&memcg->toptier, NULL); > page_counter_init(&memcg->swap, NULL); > page_counter_init(&memcg->kmem, NULL); > page_counter_init(&memcg->tcpmem, NULL); > @@ -5678,6 +5698,8 @@ static int mem_cgroup_move_account(struct page *page, > memcg_check_events(to, nid); > mem_cgroup_charge_statistics(from, -nr_pages); > memcg_check_events(from, nid); > + mem_cgroup_charge_toptier(to, nid, nr_pages); > + mem_cgroup_charge_toptier(from, nid, -nr_pages); > local_irq_enable(); > out_unlock: > folio_unlock(folio); > @@ -6761,6 +6783,7 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, > > local_irq_disable(); > mem_cgroup_charge_statistics(memcg, nr_pages); > + mem_cgroup_charge_toptier(memcg, folio_nid(folio), nr_pages); > memcg_check_events(memcg, folio_nid(folio)); > local_irq_enable(); > out: > @@ -6853,6 +6876,7 @@ struct uncharge_gather { > unsigned long nr_memory; > unsigned long pgpgout; > unsigned long nr_kmem; > + unsigned long nr_toptier; > int nid; > }; > > @@ -6867,6 +6891,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) > > if (ug->nr_memory) { > page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); > + page_counter_uncharge(&ug->memcg->toptier, ug->nr_toptier); > if (do_memsw_account()) > page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); > if (ug->nr_kmem) > @@ -6929,12 +6954,18 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) > ug->nr_memory += nr_pages; > ug->nr_kmem += nr_pages; > > + if (node_is_toptier(folio_nid(folio))) > + ug->nr_toptier += nr_pages; > + > folio->memcg_data = 0; > obj_cgroup_put(objcg); > } else { > /* LRU pages aren't accounted at the root level */ > - if (!mem_cgroup_is_root(memcg)) > + if (!mem_cgroup_is_root(memcg)) { > ug->nr_memory += nr_pages; > + if (node_is_toptier(folio_nid(folio))) > + ug->nr_toptier += nr_pages; > + } > ug->pgpgout++; > > folio->memcg_data = 0; > @@ -7011,6 +7042,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) > /* Force-charge the new page. The old one will be freed soon */ > if (!mem_cgroup_is_root(memcg)) { > page_counter_charge(&memcg->memory, nr_pages); > + mem_cgroup_charge_toptier(memcg, folio_nid(new), nr_pages); > if (do_memsw_account()) > page_counter_charge(&memcg->memsw, nr_pages); > } > @@ -7231,8 +7263,10 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) > > folio->memcg_data = 0; > > - if (!mem_cgroup_is_root(memcg)) > + if (!mem_cgroup_is_root(memcg)) { > page_counter_uncharge(&memcg->memory, nr_entries); > + mem_cgroup_charge_toptier(memcg, folio_nid(folio), -nr_entries); > + } > > if (!cgroup_memory_noswap && memcg != swap_memcg) { > if (!mem_cgroup_is_root(swap_memcg)) > -- > 2.35.1 > >
On Tue, 2022-06-14 at 17:30 -0700, Wei Xu wrote: Thanks for your comments. > When we don't know which pages are being charged, we should still > charge the usage to toptier (assuming that toptier always include the > default tier), e.g. from try_charge_memcg(). > I delayed the charging of the toptier a bit till we know which page is being used and the memcg is being assigned to the page. That's when mem_cgroup_charge_toptier is invoked. Otherwise if we charge to toptier first, we will have additional work to deduct the count when pages used are not toptier. > The idea is that when lower tier memory is not used, memcg->toptier > and memcg->memory should have the same value. Otherwise, it can cause > confusions about where the pages of (memcg->memory - memcg->toptier) > go. Any difference should be very small as the charge will go into toptier too quickly. The values will be different even if memcg->memory is read at slightly different time anyway. Tim
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ecead1042b9..b4f727cba1de 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -241,6 +241,7 @@ struct mem_cgroup { /* Accounted resources */ struct page_counter memory; /* Both v1 & v2 */ + struct page_counter toptier; union { struct page_counter swap; /* v2 only */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2f6e95e6d200..2f20ec2712b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -848,6 +848,23 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages); } +static inline void mem_cgroup_charge_toptier(struct mem_cgroup *memcg, + int nid, + int nr_pages) +{ + if (!node_is_toptier(nid) || !memcg) + return; + + if (nr_pages >= 0) { + page_counter_charge(&memcg->toptier, + (unsigned long) nr_pages); + } else { + nr_pages = -nr_pages; + page_counter_uncharge(&memcg->toptier, + (unsigned long) nr_pages); + } +} + static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, enum mem_cgroup_events_target target) { @@ -3027,6 +3044,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) if (!ret) { page->memcg_data = (unsigned long)objcg | MEMCG_DATA_KMEM; + mem_cgroup_charge_toptier(page_memcg(page), + page_to_nid(page), 1 << order); return 0; } obj_cgroup_put(objcg); @@ -3050,6 +3069,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); + mem_cgroup_charge_toptier(page_memcg(page), + page_to_nid(page), -nr_pages); folio->memcg_data = 0; obj_cgroup_put(objcg); } @@ -3947,13 +3968,10 @@ unsigned long mem_cgroup_memtier_usage(struct mem_cgroup *memcg, unsigned long mem_cgroup_toptier_usage(struct mem_cgroup *memcg) { - struct memory_tier *top_tier; - - top_tier = list_first_entry(&memory_tiers, struct memory_tier, list); - if (top_tier) - return mem_cgroup_memtier_usage(memcg, top_tier); - else + if (!memcg) return 0; + + return page_counter_read(&memcg->toptier); } #endif /* CONFIG_NUMA */ @@ -5228,11 +5246,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg->oom_kill_disable = parent->oom_kill_disable; page_counter_init(&memcg->memory, &parent->memory); + page_counter_init(&memcg->toptier, &parent->toptier); page_counter_init(&memcg->swap, &parent->swap); page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->tcpmem, &parent->tcpmem); } else { page_counter_init(&memcg->memory, NULL); + page_counter_init(&memcg->toptier, NULL); page_counter_init(&memcg->swap, NULL); page_counter_init(&memcg->kmem, NULL); page_counter_init(&memcg->tcpmem, NULL); @@ -5678,6 +5698,8 @@ static int mem_cgroup_move_account(struct page *page, memcg_check_events(to, nid); mem_cgroup_charge_statistics(from, -nr_pages); memcg_check_events(from, nid); + mem_cgroup_charge_toptier(to, nid, nr_pages); + mem_cgroup_charge_toptier(from, nid, -nr_pages); local_irq_enable(); out_unlock: folio_unlock(folio); @@ -6761,6 +6783,7 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); + mem_cgroup_charge_toptier(memcg, folio_nid(folio), nr_pages); memcg_check_events(memcg, folio_nid(folio)); local_irq_enable(); out: @@ -6853,6 +6876,7 @@ struct uncharge_gather { unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; + unsigned long nr_toptier; int nid; }; @@ -6867,6 +6891,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) if (ug->nr_memory) { page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); + page_counter_uncharge(&ug->memcg->toptier, ug->nr_toptier); if (do_memsw_account()) page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); if (ug->nr_kmem) @@ -6929,12 +6954,18 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; + if (node_is_toptier(folio_nid(folio))) + ug->nr_toptier += nr_pages; + folio->memcg_data = 0; obj_cgroup_put(objcg); } else { /* LRU pages aren't accounted at the root level */ - if (!mem_cgroup_is_root(memcg)) + if (!mem_cgroup_is_root(memcg)) { ug->nr_memory += nr_pages; + if (node_is_toptier(folio_nid(folio))) + ug->nr_toptier += nr_pages; + } ug->pgpgout++; folio->memcg_data = 0; @@ -7011,6 +7042,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Force-charge the new page. The old one will be freed soon */ if (!mem_cgroup_is_root(memcg)) { page_counter_charge(&memcg->memory, nr_pages); + mem_cgroup_charge_toptier(memcg, folio_nid(new), nr_pages); if (do_memsw_account()) page_counter_charge(&memcg->memsw, nr_pages); } @@ -7231,8 +7263,10 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) folio->memcg_data = 0; - if (!mem_cgroup_is_root(memcg)) + if (!mem_cgroup_is_root(memcg)) { page_counter_uncharge(&memcg->memory, nr_entries); + mem_cgroup_charge_toptier(memcg, folio_nid(folio), -nr_entries); + } if (!cgroup_memory_noswap && memcg != swap_memcg) { if (!mem_cgroup_is_root(swap_memcg))