diff mbox series

[v8,6/9] secretmem: add memcg accounting

Message ID 20201110151444.20662-7-rppt@kernel.org (mailing list archive)
State New
Headers show
Series mm: introduce memfd_secret system call to create "secret" memory areas | expand

Commit Message

Mike Rapoport Nov. 10, 2020, 3:14 p.m. UTC
From: Mike Rapoport <rppt@linux.ibm.com>

Account memory consumed by secretmem to memcg. The accounting is updated
when the memory is actually allocated and freed.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 mm/filemap.c   |  2 +-
 mm/secretmem.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 2 deletions(-)

Comments

Andrew Morton Nov. 13, 2020, 1:35 a.m. UTC | #1
On Tue, 10 Nov 2020 17:14:41 +0200 Mike Rapoport <rppt@kernel.org> wrote:

> Account memory consumed by secretmem to memcg. The accounting is updated
> when the memory is actually allocated and freed.

From: Andrew Morton <akpm@linux-foundation.org>
Subject: secretmem-add-memcg-accounting-fix

fix CONFIG_MEMCG=n build

Cc: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/filemap.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/mm/filemap.c~secretmem-add-memcg-accounting-fix
+++ a/mm/filemap.c
@@ -844,7 +844,7 @@ static noinline int __add_to_page_cache_
 	page->mapping = mapping;
 	page->index = offset;
 
-	if (!huge && !page->memcg_data) {
+	if (!huge && !page_memcg(page)) {
 		error = mem_cgroup_charge(page, current->mm, gfp);
 		if (error)
 			goto error;
Roman Gushchin Nov. 13, 2020, 11:42 p.m. UTC | #2
вт, 10 нояб. 2020 г. в 07:16, Mike Rapoport <rppt@kernel.org>:
>
> From: Mike Rapoport <rppt@linux.ibm.com>
>
> Account memory consumed by secretmem to memcg. The accounting is updated
> when the memory is actually allocated and freed.
>
> Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> ---
>  mm/filemap.c   |  2 +-
>  mm/secretmem.c | 42 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 249cf489f5df..11387a077373 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -844,7 +844,7 @@ static noinline int __add_to_page_cache_locked(struct page *page,
>         page->mapping = mapping;
>         page->index = offset;
>
> -       if (!huge) {
> +       if (!huge && !page->memcg_data) {
>                 error = mem_cgroup_charge(page, current->mm, gfp);
>                 if (error)
>                         goto error;
> diff --git a/mm/secretmem.c b/mm/secretmem.c
> index 1aa2b7cffe0d..1eb7667016fa 100644
> --- a/mm/secretmem.c
> +++ b/mm/secretmem.c
> @@ -17,6 +17,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/memblock.h>
>  #include <linux/pseudo_fs.h>
> +#include <linux/memcontrol.h>
>  #include <linux/set_memory.h>
>  #include <linux/sched/signal.h>
>
> @@ -49,6 +50,38 @@ struct secretmem_ctx {
>
>  static struct cma *secretmem_cma;
>

Hi Mike!

> +static int secretmem_memcg_charge(struct page *page, gfp_t gfp, int order)
> +{
> +       unsigned long nr_pages = (1 << order);
> +       int i, err;
> +
> +       err = memcg_kmem_charge_page(page, gfp, order);
> +       if (err)
> +               return err;
> +
> +       for (i = 1; i < nr_pages; i++) {
> +               struct page *p = page + i;
> +
> +               p->memcg_data = page->memcg_data;
> +       }

Hm, it looks very strange to me. Why do we need to copy memcg_data?
What about css reference counting?

And what about statistics?

I'm sorry for being late.

Thank you!

> +
> +       return 0;
> +}
> +
> +static void secretmem_memcg_uncharge(struct page *page, int order)
> +{
> +       unsigned long nr_pages = (1 << order);
> +       int i;
> +
> +       for (i = 1; i < nr_pages; i++) {
> +               struct page *p = page + i;
> +
> +               p->memcg_data = 0;
> +       }
> +
> +       memcg_kmem_uncharge_page(page, PMD_PAGE_ORDER);
> +}
> +
>  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>  {
>         unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
> @@ -61,10 +94,14 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>         if (!page)
>                 return -ENOMEM;
>
> -       err = set_direct_map_invalid_noflush(page, nr_pages);
> +       err = secretmem_memcg_charge(page, gfp, PMD_PAGE_ORDER);
>         if (err)
>                 goto err_cma_release;
>
> +       err = set_direct_map_invalid_noflush(page, nr_pages);
> +       if (err)
> +               goto err_memcg_uncharge;
> +
>         addr = (unsigned long)page_address(page);
>         err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
>         if (err)
> @@ -81,6 +118,8 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>          * won't fail
>          */
>         set_direct_map_default_noflush(page, nr_pages);
> +err_memcg_uncharge:
> +       secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
>  err_cma_release:
>         cma_release(secretmem_cma, page, nr_pages);
>         return err;
> @@ -310,6 +349,7 @@ static void secretmem_cleanup_chunk(struct gen_pool *pool,
>         int i;
>
>         set_direct_map_default_noflush(page, nr_pages);
> +       secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
>
>         for (i = 0; i < nr_pages; i++)
>                 clear_highpage(page + i);
> --
> 2.28.0
>
>
Mike Rapoport Nov. 15, 2020, 9:17 a.m. UTC | #3
On Fri, Nov 13, 2020 at 03:42:25PM -0800, Roman Gushchin wrote:
> вт, 10 нояб. 2020 г. в 07:16, Mike Rapoport <rppt@kernel.org>:
> >
> > From: Mike Rapoport <rppt@linux.ibm.com>
> >
> > Account memory consumed by secretmem to memcg. The accounting is updated
> > when the memory is actually allocated and freed.
> >
> > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> > ---
> >  mm/filemap.c   |  2 +-
> >  mm/secretmem.c | 42 +++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 42 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 249cf489f5df..11387a077373 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -844,7 +844,7 @@ static noinline int __add_to_page_cache_locked(struct page *page,
> >         page->mapping = mapping;
> >         page->index = offset;
> >
> > -       if (!huge) {
> > +       if (!huge && !page->memcg_data) {
> >                 error = mem_cgroup_charge(page, current->mm, gfp);
> >                 if (error)
> >                         goto error;
> > diff --git a/mm/secretmem.c b/mm/secretmem.c
> > index 1aa2b7cffe0d..1eb7667016fa 100644
> > --- a/mm/secretmem.c
> > +++ b/mm/secretmem.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/syscalls.h>
> >  #include <linux/memblock.h>
> >  #include <linux/pseudo_fs.h>
> > +#include <linux/memcontrol.h>
> >  #include <linux/set_memory.h>
> >  #include <linux/sched/signal.h>
> >
> > @@ -49,6 +50,38 @@ struct secretmem_ctx {
> >
> >  static struct cma *secretmem_cma;
> >
> 
> Hi Mike!
> 
> > +static int secretmem_memcg_charge(struct page *page, gfp_t gfp, int order)
> > +{
> > +       unsigned long nr_pages = (1 << order);
> > +       int i, err;
> > +
> > +       err = memcg_kmem_charge_page(page, gfp, order);
> > +       if (err)
> > +               return err;
> > +
> > +       for (i = 1; i < nr_pages; i++) {
> > +               struct page *p = page + i;
> > +
> > +               p->memcg_data = page->memcg_data;
> > +       }
> 
> Hm, it looks very strange to me. Why do we need to copy memcg_data?
> What about css reference counting?

I need to copy memcg_data to mark a page as being accounted so it won't
be charged again when it is added to page cache.

What happens here is that I allocate a large page and then use it as a
local cache for allocations in secretmem_fault(). I charge the large
page as kmem. 

During secretmem_fault() a small sub-page from that large page goes into
page cache and there I skip its memcg accounting.

In the end, when the large page is freed, the memcg_data for all its
sub-pages is cleared and I uncharge memcg with the order of large page.

An alternative would be to uncharge a small page from kmem in
secretmem_fault() and make this page charged in add_to_page_cache(), but
that would complicate the release path as I would need to re-charge the
small page back to kmem at secretmem_freepage() and track all the
participating memcgs till the large page is freed.

> And what about statistics?

Hmm, that's probably won't be accurate :-/

> I'm sorry for being late.
> 
> Thank you!
> 
> > +
> > +       return 0;
> > +}
> > +
> > +static void secretmem_memcg_uncharge(struct page *page, int order)
> > +{
> > +       unsigned long nr_pages = (1 << order);
> > +       int i;
> > +
> > +       for (i = 1; i < nr_pages; i++) {
> > +               struct page *p = page + i;
> > +
> > +               p->memcg_data = 0;
> > +       }
> > +
> > +       memcg_kmem_uncharge_page(page, PMD_PAGE_ORDER);
> > +}
> > +
> >  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
> >  {
> >         unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
> > @@ -61,10 +94,14 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
> >         if (!page)
> >                 return -ENOMEM;
> >
> > -       err = set_direct_map_invalid_noflush(page, nr_pages);
> > +       err = secretmem_memcg_charge(page, gfp, PMD_PAGE_ORDER);
> >         if (err)
> >                 goto err_cma_release;
> >
> > +       err = set_direct_map_invalid_noflush(page, nr_pages);
> > +       if (err)
> > +               goto err_memcg_uncharge;
> > +
> >         addr = (unsigned long)page_address(page);
> >         err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
> >         if (err)
> > @@ -81,6 +118,8 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
> >          * won't fail
> >          */
> >         set_direct_map_default_noflush(page, nr_pages);
> > +err_memcg_uncharge:
> > +       secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
> >  err_cma_release:
> >         cma_release(secretmem_cma, page, nr_pages);
> >         return err;
> > @@ -310,6 +349,7 @@ static void secretmem_cleanup_chunk(struct gen_pool *pool,
> >         int i;
> >
> >         set_direct_map_default_noflush(page, nr_pages);
> > +       secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
> >
> >         for (i = 0; i < nr_pages; i++)
> >                 clear_highpage(page + i);
> > --
> > 2.28.0
> >
> >
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 249cf489f5df..11387a077373 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -844,7 +844,7 @@  static noinline int __add_to_page_cache_locked(struct page *page,
 	page->mapping = mapping;
 	page->index = offset;
 
-	if (!huge) {
+	if (!huge && !page->memcg_data) {
 		error = mem_cgroup_charge(page, current->mm, gfp);
 		if (error)
 			goto error;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 1aa2b7cffe0d..1eb7667016fa 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -17,6 +17,7 @@ 
 #include <linux/syscalls.h>
 #include <linux/memblock.h>
 #include <linux/pseudo_fs.h>
+#include <linux/memcontrol.h>
 #include <linux/set_memory.h>
 #include <linux/sched/signal.h>
 
@@ -49,6 +50,38 @@  struct secretmem_ctx {
 
 static struct cma *secretmem_cma;
 
+static int secretmem_memcg_charge(struct page *page, gfp_t gfp, int order)
+{
+	unsigned long nr_pages = (1 << order);
+	int i, err;
+
+	err = memcg_kmem_charge_page(page, gfp, order);
+	if (err)
+		return err;
+
+	for (i = 1; i < nr_pages; i++) {
+		struct page *p = page + i;
+
+		p->memcg_data = page->memcg_data;
+	}
+
+	return 0;
+}
+
+static void secretmem_memcg_uncharge(struct page *page, int order)
+{
+	unsigned long nr_pages = (1 << order);
+	int i;
+
+	for (i = 1; i < nr_pages; i++) {
+		struct page *p = page + i;
+
+		p->memcg_data = 0;
+	}
+
+	memcg_kmem_uncharge_page(page, PMD_PAGE_ORDER);
+}
+
 static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 {
 	unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
@@ -61,10 +94,14 @@  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 	if (!page)
 		return -ENOMEM;
 
-	err = set_direct_map_invalid_noflush(page, nr_pages);
+	err = secretmem_memcg_charge(page, gfp, PMD_PAGE_ORDER);
 	if (err)
 		goto err_cma_release;
 
+	err = set_direct_map_invalid_noflush(page, nr_pages);
+	if (err)
+		goto err_memcg_uncharge;
+
 	addr = (unsigned long)page_address(page);
 	err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
 	if (err)
@@ -81,6 +118,8 @@  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 	 * won't fail
 	 */
 	set_direct_map_default_noflush(page, nr_pages);
+err_memcg_uncharge:
+	secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
 err_cma_release:
 	cma_release(secretmem_cma, page, nr_pages);
 	return err;
@@ -310,6 +349,7 @@  static void secretmem_cleanup_chunk(struct gen_pool *pool,
 	int i;
 
 	set_direct_map_default_noflush(page, nr_pages);
+	secretmem_memcg_uncharge(page, PMD_PAGE_ORDER);
 
 	for (i = 0; i < nr_pages; i++)
 		clear_highpage(page + i);