@@ -194,6 +194,23 @@ bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
#endif
+/*
+ * The recharge will be separated into three steps:
+ * MEMCG_KMEM_PRE_CHARGE : pre charge to the new memcg
+ * MEMCG_KMEM_UNCHARGE : uncharge from the old memcg
+ * MEMCG_KMEM_POST_CHARGE : post charge to the new memcg
+ * and an error handler:
+ * MEMCG_KMEM_CHARGE_ERR : in pre charge state, we may succeed to
+ * charge some objp's but fail to charge
+ * a new one, then in this case we should
+ * uncharge the already charged objp's.
+ */
+#define MEMCG_KMEM_PRE_CHARGE 0
+#define MEMCG_KMEM_UNCHARGE 1
+#define MEMCG_KMEM_POST_CHARGE 2
+#define MEMCG_KMEM_CHARGE_ERR 3
+bool krecharge(const void *objp, int step);
+
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
@@ -3798,6 +3798,91 @@ void kfree(const void *objp)
}
EXPORT_SYMBOL(kfree);
+bool krecharge(const void *objp, int step)
+{
+ void *object = (void *)objp;
+ struct obj_cgroup *objcg_old;
+ struct obj_cgroup *objcg_new;
+ struct obj_cgroup **objcgs;
+ struct kmem_cache *s;
+ struct slab *slab;
+ unsigned long flags;
+ unsigned int off;
+
+ WARN_ON(!in_task());
+
+ if (unlikely(ZERO_OR_NULL_PTR(objp)))
+ return true;
+
+ if (!memcg_kmem_enabled())
+ return true;
+
+ local_irq_save(flags);
+ s = virt_to_cache(objp);
+ if (!s)
+ goto out;
+
+ if (!(s->flags & SLAB_ACCOUNT))
+ goto out;
+
+ slab = virt_to_slab(object);
+ if (!slab)
+ goto out;
+
+ objcgs = slab_objcgs(slab);
+ if (!objcgs)
+ goto out;
+
+ off = obj_to_index(s, slab, object);
+ objcg_old = objcgs[off];
+ if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+ goto out;
+
+ /*
+ * The recharge can be separated into three steps,
+ * 1. Pre charge to the new memcg
+ * 2. Uncharge from the old memcg
+ * 3. Charge to the new memcg
+ */
+ switch (step) {
+ case MEMCG_KMEM_PRE_CHARGE:
+ /* Pre recharge */
+ objcg_new = get_obj_cgroup_from_current();
+ WARN_ON(!objcg_new);
+ if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+ obj_cgroup_put(objcg_new);
+ local_irq_restore(flags);
+ return false;
+ }
+ break;
+ case MEMCG_KMEM_UNCHARGE:
+ /* Uncharge from the old memcg */
+ obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+ objcgs[off] = NULL;
+ mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+ -obj_full_size(s));
+ obj_cgroup_put(objcg_old);
+ break;
+ case MEMCG_KMEM_POST_CHARGE:
+ /* Charge to the new memcg */
+ objcg_new = obj_cgroup_from_current();
+ objcgs[off] = objcg_new;
+ mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+ break;
+ case MEMCG_KMEM_CHARGE_ERR:
+ objcg_new = obj_cgroup_from_current();
+ obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+ obj_cgroup_put(objcg_new);
+ break;
+ }
+
+out:
+ local_irq_restore(flags);
+
+ return true;
+}
+EXPORT_SYMBOL(krecharge);
+
/*
* This initializes kmem_cache_node or resizes various caches for all nodes.
*/
@@ -574,6 +574,13 @@ void kfree(const void *block)
}
EXPORT_SYMBOL(kfree);
+/* kmemcg is no supported for SLOB */
+bool krecharge(const void *block, int step)
+{
+ return true;
+}
+EXPORT_SYMBOL(krecharge);
+
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
@@ -4556,6 +4556,131 @@ void kfree(const void *x)
}
EXPORT_SYMBOL(kfree);
+bool krecharge(const void *x, int step)
+{
+ void *object = (void *)x;
+ struct obj_cgroup *objcg_old;
+ struct obj_cgroup *objcg_new;
+ struct obj_cgroup **objcgs;
+ struct kmem_cache *s;
+ struct folio *folio;
+ struct slab *slab;
+ unsigned int off;
+
+ WARN_ON(!in_task());
+
+ if (!memcg_kmem_enabled())
+ return true;
+
+ if (unlikely(ZERO_OR_NULL_PTR(x)))
+ return true;
+
+ folio = virt_to_folio(x);
+ if (unlikely(!folio_test_slab(folio))) {
+ unsigned int order = folio_order(folio);
+ struct page *page;
+
+ switch (step) {
+ case MEMCG_KMEM_PRE_CHARGE:
+ objcg_new = get_obj_cgroup_from_current();
+ WARN_ON(!objcg_new);
+ /* Try charge current memcg */
+ if (obj_cgroup_charge_pages(objcg_new, GFP_KERNEL,
+ 1 << order)) {
+ obj_cgroup_put(objcg_new);
+ return false;
+ }
+ break;
+ case MEMCG_KMEM_UNCHARGE:
+ /* Uncharge folio memcg */
+ objcg_old = __folio_objcg(folio);
+ page = folio_page(folio, 0);
+ WARN_ON(!objcg_old);
+ obj_cgroup_uncharge_pages(objcg_old, 1 << order);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
+ page->memcg_data = 0;
+ obj_cgroup_put(objcg_old);
+ break;
+ case MEMCG_KMEM_POST_CHARGE:
+ /* Set current memcg to folio page */
+ objcg_new = obj_cgroup_from_current();
+ page = folio_page(folio, 0);
+ page->memcg_data = (unsigned long)objcg_new | MEMCG_DATA_KMEM;
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
+ break;
+ case MEMCG_KMEM_CHARGE_ERR:
+ objcg_new = obj_cgroup_from_current();
+ obj_cgroup_uncharge_pages(objcg_new, 1 << order);
+ obj_cgroup_put(objcg_new);
+ break;
+ }
+ return true;
+ }
+
+ slab = folio_slab(folio);
+ if (!slab)
+ return true;
+
+ s = slab->slab_cache;
+ if (!(s->flags & SLAB_ACCOUNT))
+ return true;
+
+ objcgs = slab_objcgs(slab);
+ if (!objcgs)
+ return true;
+ off = obj_to_index(s, slab, object);
+ objcg_old = objcgs[off];
+ /* In step MEMCG_KMEM_UNCHARGE, the objcg will set to NULL. */
+ if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+ return true;
+
+ /*
+ * The recharge can be separated into three steps,
+ * 1. Pre charge to the new memcg
+ * 2. Uncharge from the old memcg
+ * 3. Charge to the new memcg
+ */
+ switch (step) {
+ case MEMCG_KMEM_PRE_CHARGE:
+ /*
+ * Before uncharge from the old memcg, we must pre charge the new memcg
+ * first, to make sure it always succeed to recharge to the new memcg
+ * after uncharge from the old memcg.
+ */
+ objcg_new = get_obj_cgroup_from_current();
+ WARN_ON(!objcg_new);
+ if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+ obj_cgroup_put(objcg_new);
+ return false;
+ }
+ break;
+ case MEMCG_KMEM_UNCHARGE:
+ /* Uncharge from old memcg */
+ obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+ objcgs[off] = NULL;
+ mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+ -obj_full_size(s));
+ obj_cgroup_put(objcg_old);
+ break;
+ case MEMCG_KMEM_POST_CHARGE:
+ /* Charge to the new memcg */
+ objcg_new = obj_cgroup_from_current();
+ objcgs[off] = objcg_new;
+ mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+ break;
+ case MEMCG_KMEM_CHARGE_ERR:
+ objcg_new = obj_cgroup_from_current();
+ obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+ obj_cgroup_put(objcg_new);
+ break;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(krecharge);
+
#define SHRINK_PROMOTE_MAX 32
/*
This patch introduces a helper to recharge the corresponding pages of a given kmalloc'ed address. The recharge is divided into three steps, - pre charge to the new memcg To make sure once we uncharge from the old memcg, we can always charge to the new memcg succeesfully. If we can't pre charge to the new memcg, we won't allow it to be uncharged from the old memcg. - uncharge from the old memcg After pre charge to the new memcg, we can uncharge from the old memcg. - post charge to the new memcg Modify the counters of the new memcg. Sometimes we may want to recharge many kmalloc'ed addresses to the same memcg, in that case we should pre charge all these addresses first, then do the uncharge and finnally do the post charge. But it may happens that after succeesfully pre charge some address we fail to pre charge a new address, then we have to cancel the finished pre charge, so charge err is introduced for this purpose. Signed-off-by: Yafang Shao <laoar.shao@gmail.com> --- include/linux/slab.h | 17 ++++++ mm/slab.c | 85 +++++++++++++++++++++++++++++ mm/slob.c | 7 +++ mm/slub.c | 125 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 234 insertions(+)