diff mbox series

[v9,6/9] secretmem: add memcg accounting

Message ID 20201117162932.13649-7-rppt@kernel.org (mailing list archive)
State New, archived
Headers show
Series mm: introduce memfd_secret system call to create "secret" memory areas | expand

Commit Message

Mike Rapoport Nov. 17, 2020, 4:29 p.m. UTC
From: Mike Rapoport <rppt@linux.ibm.com>

Account memory consumed by secretmem to memcg. The accounting is updated
when the memory is actually allocated and freed.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 mm/filemap.c   |  3 ++-
 mm/secretmem.c | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

Comments

Roman Gushchin Nov. 17, 2020, 7:33 p.m. UTC | #1
On Tue, Nov 17, 2020 at 06:29:29PM +0200, Mike Rapoport wrote:
> From: Mike Rapoport <rppt@linux.ibm.com>
> 
> Account memory consumed by secretmem to memcg. The accounting is updated
> when the memory is actually allocated and freed.
> 
> Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>

Hi Mike!

I like this version so much more, thank you for fixing it.
Please, feel free to add
Acked-by: Roman Gushchin <guro@fb.com> .

Thanks!

> ---
>  mm/filemap.c   |  3 ++-
>  mm/secretmem.c | 36 +++++++++++++++++++++++++++++++++++-
>  2 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 249cf489f5df..cf7f1dc9f4b8 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -42,6 +42,7 @@
>  #include <linux/psi.h>
>  #include <linux/ramfs.h>
>  #include <linux/page_idle.h>
> +#include <linux/secretmem.h>
>  #include "internal.h"
>  
>  #define CREATE_TRACE_POINTS
> @@ -844,7 +845,7 @@ static noinline int __add_to_page_cache_locked(struct page *page,
>  	page->mapping = mapping;
>  	page->index = offset;
>  
> -	if (!huge) {
> +	if (!huge && !page_is_secretmem(page)) {
>  		error = mem_cgroup_charge(page, current->mm, gfp);
>  		if (error)
>  			goto error;
> diff --git a/mm/secretmem.c b/mm/secretmem.c
> index d4c44fc568a4..abf6ecdf70cb 100644
> --- a/mm/secretmem.c
> +++ b/mm/secretmem.c
> @@ -18,6 +18,7 @@
>  #include <linux/memblock.h>
>  #include <linux/pseudo_fs.h>
>  #include <linux/secretmem.h>
> +#include <linux/memcontrol.h>
>  #include <linux/set_memory.h>
>  #include <linux/sched/signal.h>
>  
> @@ -50,6 +51,32 @@ struct secretmem_ctx {
>  
>  static struct cma *secretmem_cma;
>  
> +static int secretmem_account_pages(struct page *page, gfp_t gfp, int order)
> +{
> +	int err;
> +
> +	err = memcg_kmem_charge_page(page, gfp, order);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * seceremem caches are unreclaimable kernel allocations, so treat
           ^^^^^^^^^
	   secretmem?

> +	 * them as unreclaimable slab memory for VM statistics purposes
> +	 */
> +	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
> +			    PAGE_SIZE << order);
> +
> +	return 0;
> +}
> +
> +static void secretmem_unaccount_pages(struct page *page, int order)
> +{
> +
> +	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
> +			    -PAGE_SIZE << order);
> +	memcg_kmem_uncharge_page(page, order);
> +}
> +
>  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>  {
>  	unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
> @@ -62,10 +89,14 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>  	if (!page)
>  		return -ENOMEM;
>  
> -	err = set_direct_map_invalid_noflush(page, nr_pages);
> +	err = secretmem_account_pages(page, gfp, PMD_PAGE_ORDER);
>  	if (err)
>  		goto err_cma_release;
>  
> +	err = set_direct_map_invalid_noflush(page, nr_pages);
> +	if (err)
> +		goto err_memcg_uncharge;
> +
>  	addr = (unsigned long)page_address(page);
>  	err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
>  	if (err)
> @@ -82,6 +113,8 @@ static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
>  	 * won't fail
>  	 */
>  	set_direct_map_default_noflush(page, nr_pages);
> +err_memcg_uncharge:
> +	secretmem_unaccount_pages(page, PMD_PAGE_ORDER);
>  err_cma_release:
>  	cma_release(secretmem_cma, page, nr_pages);
>  	return err;
> @@ -312,6 +345,7 @@ static void secretmem_cleanup_chunk(struct gen_pool *pool,
>  	int i;
>  
>  	set_direct_map_default_noflush(page, nr_pages);
> +	secretmem_unaccount_pages(page, PMD_PAGE_ORDER);
>  
>  	for (i = 0; i < nr_pages; i++)
>  		clear_highpage(page + i);
> -- 
> 2.28.0
>
Shakeel Butt Nov. 17, 2020, 8:02 p.m. UTC | #2
On Tue, Nov 17, 2020 at 11:49 AM Roman Gushchin <guro@fb.com> wrote:
>
> On Tue, Nov 17, 2020 at 06:29:29PM +0200, Mike Rapoport wrote:
> > From: Mike Rapoport <rppt@linux.ibm.com>
> >
> > Account memory consumed by secretmem to memcg. The accounting is updated
> > when the memory is actually allocated and freed.
> >
> > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
[snip]
> >
> > +static int secretmem_account_pages(struct page *page, gfp_t gfp, int order)
> > +{
> > +     int err;
> > +
> > +     err = memcg_kmem_charge_page(page, gfp, order);

I haven't looked at the whole series but it seems like these pages
will be mapped into the userspace, so this patch has dependency on
Roman's "mm: allow mapping
accounted kernel pages to userspace" patch series.
Mike Rapoport Nov. 18, 2020, 6:55 a.m. UTC | #3
On Tue, Nov 17, 2020 at 12:02:01PM -0800, Shakeel Butt wrote:
> On Tue, Nov 17, 2020 at 11:49 AM Roman Gushchin <guro@fb.com> wrote:
> >
> > On Tue, Nov 17, 2020 at 06:29:29PM +0200, Mike Rapoport wrote:
> > > From: Mike Rapoport <rppt@linux.ibm.com>
> > >
> > > Account memory consumed by secretmem to memcg. The accounting is updated
> > > when the memory is actually allocated and freed.
> > >
> > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> [snip]
> > >
> > > +static int secretmem_account_pages(struct page *page, gfp_t gfp, int order)
> > > +{
> > > +     int err;
> > > +
> > > +     err = memcg_kmem_charge_page(page, gfp, order);
> 
> I haven't looked at the whole series but it seems like these pages
> will be mapped into the userspace, so this patch has dependency on
> Roman's "mm: allow mapping
> accounted kernel pages to userspace" patch series.

Yes, that's why I rebased the patches on top of mmotm.
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 249cf489f5df..cf7f1dc9f4b8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,6 +42,7 @@ 
 #include <linux/psi.h>
 #include <linux/ramfs.h>
 #include <linux/page_idle.h>
+#include <linux/secretmem.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -844,7 +845,7 @@  static noinline int __add_to_page_cache_locked(struct page *page,
 	page->mapping = mapping;
 	page->index = offset;
 
-	if (!huge) {
+	if (!huge && !page_is_secretmem(page)) {
 		error = mem_cgroup_charge(page, current->mm, gfp);
 		if (error)
 			goto error;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index d4c44fc568a4..abf6ecdf70cb 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -18,6 +18,7 @@ 
 #include <linux/memblock.h>
 #include <linux/pseudo_fs.h>
 #include <linux/secretmem.h>
+#include <linux/memcontrol.h>
 #include <linux/set_memory.h>
 #include <linux/sched/signal.h>
 
@@ -50,6 +51,32 @@  struct secretmem_ctx {
 
 static struct cma *secretmem_cma;
 
+static int secretmem_account_pages(struct page *page, gfp_t gfp, int order)
+{
+	int err;
+
+	err = memcg_kmem_charge_page(page, gfp, order);
+	if (err)
+		return err;
+
+	/*
+	 * seceremem caches are unreclaimable kernel allocations, so treat
+	 * them as unreclaimable slab memory for VM statistics purposes
+	 */
+	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+			    PAGE_SIZE << order);
+
+	return 0;
+}
+
+static void secretmem_unaccount_pages(struct page *page, int order)
+{
+
+	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+			    -PAGE_SIZE << order);
+	memcg_kmem_uncharge_page(page, order);
+}
+
 static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 {
 	unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
@@ -62,10 +89,14 @@  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 	if (!page)
 		return -ENOMEM;
 
-	err = set_direct_map_invalid_noflush(page, nr_pages);
+	err = secretmem_account_pages(page, gfp, PMD_PAGE_ORDER);
 	if (err)
 		goto err_cma_release;
 
+	err = set_direct_map_invalid_noflush(page, nr_pages);
+	if (err)
+		goto err_memcg_uncharge;
+
 	addr = (unsigned long)page_address(page);
 	err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
 	if (err)
@@ -82,6 +113,8 @@  static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
 	 * won't fail
 	 */
 	set_direct_map_default_noflush(page, nr_pages);
+err_memcg_uncharge:
+	secretmem_unaccount_pages(page, PMD_PAGE_ORDER);
 err_cma_release:
 	cma_release(secretmem_cma, page, nr_pages);
 	return err;
@@ -312,6 +345,7 @@  static void secretmem_cleanup_chunk(struct gen_pool *pool,
 	int i;
 
 	set_direct_map_default_noflush(page, nr_pages);
+	secretmem_unaccount_pages(page, PMD_PAGE_ORDER);
 
 	for (i = 0; i < nr_pages; i++)
 		clear_highpage(page + i);