diff mbox series

[4/4] mm: zswap: add basic meminfo and vmstat coverage

Message ID 20210819195533.211756-4-hannes@cmpxchg.org (mailing list archive)
State New
Headers show
Series [1/4] mm: Kconfig: move swap and slab config options to the MM section | expand

Commit Message

Johannes Weiner Aug. 19, 2021, 7:55 p.m. UTC
Currently it requires poking at debugfs to figure out the size of the
zswap cache size on a host. There are no counters for reads and writes
against the cache. This makes it difficult to understand behavior on
production systems.

Print zswap memory consumption in /proc/meminfo, count zswapouts and
zswapins in /proc/vmstat.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 fs/proc/meminfo.c             |  4 ++++
 include/linux/swap.h          |  4 ++++
 include/linux/vm_event_item.h |  4 ++++
 mm/vmstat.c                   |  4 ++++
 mm/zswap.c                    | 11 +++++------
 5 files changed, 21 insertions(+), 6 deletions(-)

Comments

Vlastimil Babka Aug. 24, 2021, 12:05 p.m. UTC | #1
+CC zswap maintainers

On 8/19/21 21:55, Johannes Weiner wrote:
> Currently it requires poking at debugfs to figure out the size of the
> zswap cache size on a host. There are no counters for reads and writes
> against the cache. This makes it difficult to understand behavior on
> production systems.
> 
> Print zswap memory consumption in /proc/meminfo, count zswapouts and
> zswapins in /proc/vmstat.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  fs/proc/meminfo.c             |  4 ++++
>  include/linux/swap.h          |  4 ++++
>  include/linux/vm_event_item.h |  4 ++++
>  mm/vmstat.c                   |  4 ++++
>  mm/zswap.c                    | 11 +++++------
>  5 files changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6fa761c9cc78..2dc474940691 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  
>  	show_val_kb(m, "SwapTotal:      ", i.totalswap);
>  	show_val_kb(m, "SwapFree:       ", i.freeswap);
> +#ifdef CONFIG_ZSWAP
> +	seq_printf(m,  "Zswap:          %8lu kB\n",
> +		   (unsigned long)(zswap_pool_total_size >> 10));
> +#endif
>  	show_val_kb(m, "Dirty:          ",
>  		    global_node_page_state(NR_FILE_DIRTY));
>  	show_val_kb(m, "Writeback:      ",
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 144727041e78..3b23c88b6a8d 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -696,6 +696,10 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
>  }
>  #endif
>  
> +#ifdef CONFIG_ZSWAP
> +extern u64 zswap_pool_total_size;
> +#endif
> +
>  #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
>  extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
>  #else
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index ae0dd1948c2b..9dbebea09c69 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -125,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		SWAP_RA,
>  		SWAP_RA_HIT,
>  #endif
> +#ifdef CONFIG_ZSWAP
> +		ZSWPIN,
> +		ZSWPOUT,
> +#endif
>  #ifdef CONFIG_X86
>  		DIRECT_MAP_LEVEL2_SPLIT,
>  		DIRECT_MAP_LEVEL3_SPLIT,
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index cccee36b289c..31aada15c571 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1369,6 +1369,10 @@ const char * const vmstat_text[] = {
>  	"swap_ra",
>  	"swap_ra_hit",
>  #endif
> +#ifdef CONFIG_ZSWAP
> +	"zswpin",
> +	"zswpout",
> +#endif
>  #ifdef CONFIG_X86
>  	"direct_map_level2_splits",
>  	"direct_map_level3_splits",
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 20763267a219..f93a7c715f76 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -40,7 +40,7 @@
>  * statistics
>  **********************************/
>  /* Total bytes used by the compressed storage */
> -static u64 zswap_pool_total_size;
> +u64 zswap_pool_total_size;
>  /* The number of compressed pages currently stored in zswap */
>  static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
>  /* The number of same-value filled pages currently stored in zswap */
> @@ -1231,6 +1231,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>  	/* update stats */
>  	atomic_inc(&zswap_stored_pages);
>  	zswap_update_total_size();
> +	count_vm_event(ZSWAPOUT);
>  
>  	return 0;
>  
> @@ -1273,11 +1274,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		zswap_fill_page(dst, entry->value);
>  		kunmap_atomic(dst);
>  		ret = 0;
> -		goto freeentry;
> +		goto stats;
>  	}
>  
>  	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
> -
>  		tmp = kmalloc(entry->length, GFP_ATOMIC);
>  		if (!tmp) {
>  			ret = -ENOMEM;
> @@ -1292,10 +1292,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		src += sizeof(struct zswap_header);
>  
>  	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
> -
>  		memcpy(tmp, src, entry->length);
>  		src = tmp;
> -
>  		zpool_unmap_handle(entry->pool->zpool, entry->handle);
>  	}
>  
> @@ -1314,7 +1312,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		kfree(tmp);
>  
>  	BUG_ON(ret);
> -
> +stats:
> +	count_vm_event(ZSWAPIN);
>  freeentry:
>  	spin_lock(&tree->lock);
>  	zswap_entry_put(tree, entry);
>
kernel test robot Aug. 30, 2021, 6:04 p.m. UTC | #2
Hi Johannes,

I love your patch! Yet something to improve:

[auto build test ERROR on linux/master]
[cannot apply to hnaz-linux-mm/master block/for-next linus/master v5.14 next-20210830]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Johannes-Weiner/mm-Kconfig-move-swap-and-slab-config-options-to-the-MM-section/20210820-035613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830
config: x86_64-rhel-8.3 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/216a0ba919927dccd2dd26d7af1e395f4360002f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Johannes-Weiner/mm-Kconfig-move-swap-and-slab-config-options-to-the-MM-section/20210820-035613
        git checkout 216a0ba919927dccd2dd26d7af1e395f4360002f
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/zswap.c: In function 'zswap_frontswap_store':
>> mm/zswap.c:1234:17: error: 'ZSWAPOUT' undeclared (first use in this function); did you mean 'ZSWPOUT'?
    1234 |  count_vm_event(ZSWAPOUT);
         |                 ^~~~~~~~
         |                 ZSWPOUT
   mm/zswap.c:1234:17: note: each undeclared identifier is reported only once for each function it appears in
   mm/zswap.c: In function 'zswap_frontswap_load':
>> mm/zswap.c:1316:17: error: 'ZSWAPIN' undeclared (first use in this function); did you mean 'ZSWPIN'?
    1316 |  count_vm_event(ZSWAPIN);
         |                 ^~~~~~~
         |                 ZSWPIN


vim +1234 mm/zswap.c

  1080	
  1081	/*********************************
  1082	* frontswap hooks
  1083	**********************************/
  1084	/* attempts to compress and store an single page */
  1085	static int zswap_frontswap_store(unsigned type, pgoff_t offset,
  1086					struct page *page)
  1087	{
  1088		struct zswap_tree *tree = zswap_trees[type];
  1089		struct zswap_entry *entry, *dupentry;
  1090		struct scatterlist input, output;
  1091		struct crypto_acomp_ctx *acomp_ctx;
  1092		int ret;
  1093		unsigned int hlen, dlen = PAGE_SIZE;
  1094		unsigned long handle, value;
  1095		char *buf;
  1096		u8 *src, *dst;
  1097		struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
  1098		gfp_t gfp;
  1099	
  1100		/* THP isn't supported */
  1101		if (PageTransHuge(page)) {
  1102			ret = -EINVAL;
  1103			goto reject;
  1104		}
  1105	
  1106		if (!zswap_enabled || !tree) {
  1107			ret = -ENODEV;
  1108			goto reject;
  1109		}
  1110	
  1111		/* reclaim space if needed */
  1112		if (zswap_is_full()) {
  1113			struct zswap_pool *pool;
  1114	
  1115			zswap_pool_limit_hit++;
  1116			zswap_pool_reached_full = true;
  1117			pool = zswap_pool_last_get();
  1118			if (pool)
  1119				queue_work(shrink_wq, &pool->shrink_work);
  1120			ret = -ENOMEM;
  1121			goto reject;
  1122		}
  1123	
  1124		if (zswap_pool_reached_full) {
  1125		       if (!zswap_can_accept()) {
  1126				ret = -ENOMEM;
  1127				goto reject;
  1128			} else
  1129				zswap_pool_reached_full = false;
  1130		}
  1131	
  1132		/* allocate entry */
  1133		entry = zswap_entry_cache_alloc(GFP_KERNEL);
  1134		if (!entry) {
  1135			zswap_reject_kmemcache_fail++;
  1136			ret = -ENOMEM;
  1137			goto reject;
  1138		}
  1139	
  1140		if (zswap_same_filled_pages_enabled) {
  1141			src = kmap_atomic(page);
  1142			if (zswap_is_page_same_filled(src, &value)) {
  1143				kunmap_atomic(src);
  1144				entry->offset = offset;
  1145				entry->length = 0;
  1146				entry->value = value;
  1147				atomic_inc(&zswap_same_filled_pages);
  1148				goto insert_entry;
  1149			}
  1150			kunmap_atomic(src);
  1151		}
  1152	
  1153		/* if entry is successfully added, it keeps the reference */
  1154		entry->pool = zswap_pool_current_get();
  1155		if (!entry->pool) {
  1156			ret = -EINVAL;
  1157			goto freepage;
  1158		}
  1159	
  1160		/* compress */
  1161		acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  1162	
  1163		mutex_lock(acomp_ctx->mutex);
  1164	
  1165		dst = acomp_ctx->dstmem;
  1166		sg_init_table(&input, 1);
  1167		sg_set_page(&input, page, PAGE_SIZE, 0);
  1168	
  1169		/* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
  1170		sg_init_one(&output, dst, PAGE_SIZE * 2);
  1171		acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
  1172		/*
  1173		 * it maybe looks a little bit silly that we send an asynchronous request,
  1174		 * then wait for its completion synchronously. This makes the process look
  1175		 * synchronous in fact.
  1176		 * Theoretically, acomp supports users send multiple acomp requests in one
  1177		 * acomp instance, then get those requests done simultaneously. but in this
  1178		 * case, frontswap actually does store and load page by page, there is no
  1179		 * existing method to send the second page before the first page is done
  1180		 * in one thread doing frontswap.
  1181		 * but in different threads running on different cpu, we have different
  1182		 * acomp instance, so multiple threads can do (de)compression in parallel.
  1183		 */
  1184		ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
  1185		dlen = acomp_ctx->req->dlen;
  1186	
  1187		if (ret) {
  1188			ret = -EINVAL;
  1189			goto put_dstmem;
  1190		}
  1191	
  1192		/* store */
  1193		hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
  1194		gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
  1195		if (zpool_malloc_support_movable(entry->pool->zpool))
  1196			gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
  1197		ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
  1198		if (ret == -ENOSPC) {
  1199			zswap_reject_compress_poor++;
  1200			goto put_dstmem;
  1201		}
  1202		if (ret) {
  1203			zswap_reject_alloc_fail++;
  1204			goto put_dstmem;
  1205		}
  1206		buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
  1207		memcpy(buf, &zhdr, hlen);
  1208		memcpy(buf + hlen, dst, dlen);
  1209		zpool_unmap_handle(entry->pool->zpool, handle);
  1210		mutex_unlock(acomp_ctx->mutex);
  1211	
  1212		/* populate entry */
  1213		entry->offset = offset;
  1214		entry->handle = handle;
  1215		entry->length = dlen;
  1216	
  1217	insert_entry:
  1218		/* map */
  1219		spin_lock(&tree->lock);
  1220		do {
  1221			ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
  1222			if (ret == -EEXIST) {
  1223				zswap_duplicate_entry++;
  1224				/* remove from rbtree */
  1225				zswap_rb_erase(&tree->rbroot, dupentry);
  1226				zswap_entry_put(tree, dupentry);
  1227			}
  1228		} while (ret == -EEXIST);
  1229		spin_unlock(&tree->lock);
  1230	
  1231		/* update stats */
  1232		atomic_inc(&zswap_stored_pages);
  1233		zswap_update_total_size();
> 1234		count_vm_event(ZSWAPOUT);
  1235	
  1236		return 0;
  1237	
  1238	put_dstmem:
  1239		mutex_unlock(acomp_ctx->mutex);
  1240		zswap_pool_put(entry->pool);
  1241	freepage:
  1242		zswap_entry_cache_free(entry);
  1243	reject:
  1244		return ret;
  1245	}
  1246	
  1247	/*
  1248	 * returns 0 if the page was successfully decompressed
  1249	 * return -1 on entry not found or error
  1250	*/
  1251	static int zswap_frontswap_load(unsigned type, pgoff_t offset,
  1252					struct page *page)
  1253	{
  1254		struct zswap_tree *tree = zswap_trees[type];
  1255		struct zswap_entry *entry;
  1256		struct scatterlist input, output;
  1257		struct crypto_acomp_ctx *acomp_ctx;
  1258		u8 *src, *dst, *tmp;
  1259		unsigned int dlen;
  1260		int ret;
  1261	
  1262		/* find */
  1263		spin_lock(&tree->lock);
  1264		entry = zswap_entry_find_get(&tree->rbroot, offset);
  1265		if (!entry) {
  1266			/* entry was written back */
  1267			spin_unlock(&tree->lock);
  1268			return -1;
  1269		}
  1270		spin_unlock(&tree->lock);
  1271	
  1272		if (!entry->length) {
  1273			dst = kmap_atomic(page);
  1274			zswap_fill_page(dst, entry->value);
  1275			kunmap_atomic(dst);
  1276			ret = 0;
  1277			goto stats;
  1278		}
  1279	
  1280		if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
  1281			tmp = kmalloc(entry->length, GFP_ATOMIC);
  1282			if (!tmp) {
  1283				ret = -ENOMEM;
  1284				goto freeentry;
  1285			}
  1286		}
  1287	
  1288		/* decompress */
  1289		dlen = PAGE_SIZE;
  1290		src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
  1291		if (zpool_evictable(entry->pool->zpool))
  1292			src += sizeof(struct zswap_header);
  1293	
  1294		if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
  1295			memcpy(tmp, src, entry->length);
  1296			src = tmp;
  1297			zpool_unmap_handle(entry->pool->zpool, entry->handle);
  1298		}
  1299	
  1300		acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  1301		mutex_lock(acomp_ctx->mutex);
  1302		sg_init_one(&input, src, entry->length);
  1303		sg_init_table(&output, 1);
  1304		sg_set_page(&output, page, PAGE_SIZE, 0);
  1305		acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
  1306		ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
  1307		mutex_unlock(acomp_ctx->mutex);
  1308	
  1309		if (zpool_can_sleep_mapped(entry->pool->zpool))
  1310			zpool_unmap_handle(entry->pool->zpool, entry->handle);
  1311		else
  1312			kfree(tmp);
  1313	
  1314		BUG_ON(ret);
  1315	stats:
> 1316		count_vm_event(ZSWAPIN);
  1317	freeentry:
  1318		spin_lock(&tree->lock);
  1319		zswap_entry_put(tree, entry);
  1320		spin_unlock(&tree->lock);
  1321	
  1322		return ret;
  1323	}
  1324	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Minchan Kim Aug. 30, 2021, 6:49 p.m. UTC | #3
Hi Johannes,

On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote:
> Currently it requires poking at debugfs to figure out the size of the
> zswap cache size on a host. There are no counters for reads and writes
> against the cache. This makes it difficult to understand behavior on
> production systems.
> 
> Print zswap memory consumption in /proc/meminfo, count zswapouts and
> zswapins in /proc/vmstat.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  fs/proc/meminfo.c             |  4 ++++
>  include/linux/swap.h          |  4 ++++
>  include/linux/vm_event_item.h |  4 ++++
>  mm/vmstat.c                   |  4 ++++
>  mm/zswap.c                    | 11 +++++------
>  5 files changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6fa761c9cc78..2dc474940691 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  
>  	show_val_kb(m, "SwapTotal:      ", i.totalswap);
>  	show_val_kb(m, "SwapFree:       ", i.freeswap);
> +#ifdef CONFIG_ZSWAP
> +	seq_printf(m,  "Zswap:          %8lu kB\n",
> +		   (unsigned long)(zswap_pool_total_size >> 10));

Since we have zram as well as zswap, it would be great if
we can abstract both at once without introducing another
"Zram: " stuff in meminfo. A note: zram can support fs based on
on zram blk device as well as swap. Thus, term would be better
to say "compressed" rather than "swap".

How about this?

"Compressed: xx kB"

unsigined long total_compressed_memory(void) {
    return zswap_compressed_mem() + zram_comporessed_mem();
}

> +#endif
>  	show_val_kb(m, "Dirty:          ",
>  		    global_node_page_state(NR_FILE_DIRTY));
>  	show_val_kb(m, "Writeback:      ",
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 144727041e78..3b23c88b6a8d 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -696,6 +696,10 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
>  }
>  #endif
>  
> +#ifdef CONFIG_ZSWAP
> +extern u64 zswap_pool_total_size;
> +#endif
> +
>  #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
>  extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
>  #else
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index ae0dd1948c2b..9dbebea09c69 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -125,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		SWAP_RA,
>  		SWAP_RA_HIT,
>  #endif
> +#ifdef CONFIG_ZSWAP
> +		ZSWPIN,
> +		ZSWPOUT,

INMEM_SWP[IN|OUT] to represent both zram and zswap ?
Feel free to suggest better word.

> +#endif
>  #ifdef CONFIG_X86
>  		DIRECT_MAP_LEVEL2_SPLIT,
>  		DIRECT_MAP_LEVEL3_SPLIT,
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index cccee36b289c..31aada15c571 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1369,6 +1369,10 @@ const char * const vmstat_text[] = {
>  	"swap_ra",
>  	"swap_ra_hit",
>  #endif
> +#ifdef CONFIG_ZSWAP
> +	"zswpin",
> +	"zswpout",
> +#endif
>  #ifdef CONFIG_X86
>  	"direct_map_level2_splits",
>  	"direct_map_level3_splits",
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 20763267a219..f93a7c715f76 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -40,7 +40,7 @@
>  * statistics
>  **********************************/
>  /* Total bytes used by the compressed storage */
> -static u64 zswap_pool_total_size;
> +u64 zswap_pool_total_size;
>  /* The number of compressed pages currently stored in zswap */
>  static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
>  /* The number of same-value filled pages currently stored in zswap */
> @@ -1231,6 +1231,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>  	/* update stats */
>  	atomic_inc(&zswap_stored_pages);
>  	zswap_update_total_size();
> +	count_vm_event(ZSWAPOUT);
>  
>  	return 0;
>  
> @@ -1273,11 +1274,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		zswap_fill_page(dst, entry->value);
>  		kunmap_atomic(dst);
>  		ret = 0;
> -		goto freeentry;
> +		goto stats;
>  	}
>  
>  	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
> -
>  		tmp = kmalloc(entry->length, GFP_ATOMIC);
>  		if (!tmp) {
>  			ret = -ENOMEM;
> @@ -1292,10 +1292,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		src += sizeof(struct zswap_header);
>  
>  	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
> -
>  		memcpy(tmp, src, entry->length);
>  		src = tmp;
> -
>  		zpool_unmap_handle(entry->pool->zpool, entry->handle);
>  	}
>  
> @@ -1314,7 +1312,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  		kfree(tmp);
>  
>  	BUG_ON(ret);
> -
> +stats:
> +	count_vm_event(ZSWAPIN);
>  freeentry:
>  	spin_lock(&tree->lock);
>  	zswap_entry_put(tree, entry);
> -- 
> 2.32.0
> 
>
Johannes Weiner Nov. 2, 2021, 3:06 p.m. UTC | #4
Hi Minchan,

Sorry about the delay, I'm just now getting back to these patches.

On Mon, Aug 30, 2021 at 11:49:59AM -0700, Minchan Kim wrote:
> Hi Johannes,
> 
> On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote:
> > Currently it requires poking at debugfs to figure out the size of the
> > zswap cache size on a host. There are no counters for reads and writes
> > against the cache. This makes it difficult to understand behavior on
> > production systems.
> > 
> > Print zswap memory consumption in /proc/meminfo, count zswapouts and
> > zswapins in /proc/vmstat.
> > 
> > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> > ---
> >  fs/proc/meminfo.c             |  4 ++++
> >  include/linux/swap.h          |  4 ++++
> >  include/linux/vm_event_item.h |  4 ++++
> >  mm/vmstat.c                   |  4 ++++
> >  mm/zswap.c                    | 11 +++++------
> >  5 files changed, 21 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> > index 6fa761c9cc78..2dc474940691 100644
> > --- a/fs/proc/meminfo.c
> > +++ b/fs/proc/meminfo.c
> > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> >  
> >  	show_val_kb(m, "SwapTotal:      ", i.totalswap);
> >  	show_val_kb(m, "SwapFree:       ", i.freeswap);
> > +#ifdef CONFIG_ZSWAP
> > +	seq_printf(m,  "Zswap:          %8lu kB\n",
> > +		   (unsigned long)(zswap_pool_total_size >> 10));
> 
> Since we have zram as well as zswap, it would be great if
> we can abstract both at once without introducing another
> "Zram: " stuff in meminfo. A note: zram can support fs based on
> on zram blk device as well as swap. Thus, term would be better
> to say "compressed" rather than "swap".
> 
> How about this?
> 
> "Compressed: xx kB"

Wouldn't it make more sense to keep separate counters? Zswap and zram
are quite different from each other.

From an MM perspective, zram is an opaque storage backend. zswap OTOH
is an explicit MM cache stage which may in the future make different
decisions than zram, be integrated into vmscan's LRU hierarchy
etc. And in theory, you could put zswap with fast compression in front
of a zram device with denser compression, right?

I agree zram should probably also have memory counters, but I think it
makes sense to recognize zswap as a unique MM layer.
Minchan Kim Nov. 10, 2021, 7:11 p.m. UTC | #5
Hi Johannes,

On Tue, Nov 02, 2021 at 11:06:17AM -0400, Johannes Weiner wrote:
> Hi Minchan,
> 
> Sorry about the delay, I'm just now getting back to these patches.
> 
> On Mon, Aug 30, 2021 at 11:49:59AM -0700, Minchan Kim wrote:
> > Hi Johannes,
> > 
> > On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote:
> > > Currently it requires poking at debugfs to figure out the size of the
> > > zswap cache size on a host. There are no counters for reads and writes
> > > against the cache. This makes it difficult to understand behavior on
> > > production systems.
> > > 
> > > Print zswap memory consumption in /proc/meminfo, count zswapouts and
> > > zswapins in /proc/vmstat.
> > > 
> > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> > > ---
> > >  fs/proc/meminfo.c             |  4 ++++
> > >  include/linux/swap.h          |  4 ++++
> > >  include/linux/vm_event_item.h |  4 ++++
> > >  mm/vmstat.c                   |  4 ++++
> > >  mm/zswap.c                    | 11 +++++------
> > >  5 files changed, 21 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> > > index 6fa761c9cc78..2dc474940691 100644
> > > --- a/fs/proc/meminfo.c
> > > +++ b/fs/proc/meminfo.c
> > > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> > >  
> > >  	show_val_kb(m, "SwapTotal:      ", i.totalswap);
> > >  	show_val_kb(m, "SwapFree:       ", i.freeswap);
> > > +#ifdef CONFIG_ZSWAP
> > > +	seq_printf(m,  "Zswap:          %8lu kB\n",
> > > +		   (unsigned long)(zswap_pool_total_size >> 10));
> > 
> > Since we have zram as well as zswap, it would be great if
> > we can abstract both at once without introducing another
> > "Zram: " stuff in meminfo. A note: zram can support fs based on
> > on zram blk device as well as swap. Thus, term would be better
> > to say "compressed" rather than "swap".
> > 
> > How about this?
> > 
> > "Compressed: xx kB"
> 
> Wouldn't it make more sense to keep separate counters? Zswap and zram
> are quite different from each other.
> 
> From an MM perspective, zram is an opaque storage backend. zswap OTOH
> is an explicit MM cache stage which may in the future make different
> decisions than zram, be integrated into vmscan's LRU hierarchy
> etc. And in theory, you could put zswap with fast compression in front
> of a zram device with denser compression, right?

My view is the the allocators aims to store compressed memory.
Likewise slab allocator, we could use the allocator any places
and display the total memory usage from the allocator in meminfo
instead of each subsystem and look at slabinfo if we need further
break down. I think it could work for this case, too.

> 
> I agree zram should probably also have memory counters, but I think it
> makes sense to recognize zswap as a unique MM layer.

Under your view, I think it would introduce Zram-swap, Zram-block
as well as Zswap. If folks think it's better idea, I am fine and
happy to post a patch to merge it along with this patch.
diff mbox series

Patch

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6fa761c9cc78..2dc474940691 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -86,6 +86,10 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 
 	show_val_kb(m, "SwapTotal:      ", i.totalswap);
 	show_val_kb(m, "SwapFree:       ", i.freeswap);
+#ifdef CONFIG_ZSWAP
+	seq_printf(m,  "Zswap:          %8lu kB\n",
+		   (unsigned long)(zswap_pool_total_size >> 10));
+#endif
 	show_val_kb(m, "Dirty:          ",
 		    global_node_page_state(NR_FILE_DIRTY));
 	show_val_kb(m, "Writeback:      ",
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 144727041e78..3b23c88b6a8d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -696,6 +696,10 @@  static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 
+#ifdef CONFIG_ZSWAP
+extern u64 zswap_pool_total_size;
+#endif
+
 #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
 extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
 #else
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ae0dd1948c2b..9dbebea09c69 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -125,6 +125,10 @@  enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		SWAP_RA,
 		SWAP_RA_HIT,
 #endif
+#ifdef CONFIG_ZSWAP
+		ZSWPIN,
+		ZSWPOUT,
+#endif
 #ifdef CONFIG_X86
 		DIRECT_MAP_LEVEL2_SPLIT,
 		DIRECT_MAP_LEVEL3_SPLIT,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index cccee36b289c..31aada15c571 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1369,6 +1369,10 @@  const char * const vmstat_text[] = {
 	"swap_ra",
 	"swap_ra_hit",
 #endif
+#ifdef CONFIG_ZSWAP
+	"zswpin",
+	"zswpout",
+#endif
 #ifdef CONFIG_X86
 	"direct_map_level2_splits",
 	"direct_map_level3_splits",
diff --git a/mm/zswap.c b/mm/zswap.c
index 20763267a219..f93a7c715f76 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -40,7 +40,7 @@ 
 * statistics
 **********************************/
 /* Total bytes used by the compressed storage */
-static u64 zswap_pool_total_size;
+u64 zswap_pool_total_size;
 /* The number of compressed pages currently stored in zswap */
 static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
 /* The number of same-value filled pages currently stored in zswap */
@@ -1231,6 +1231,7 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	/* update stats */
 	atomic_inc(&zswap_stored_pages);
 	zswap_update_total_size();
+	count_vm_event(ZSWAPOUT);
 
 	return 0;
 
@@ -1273,11 +1274,10 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 		zswap_fill_page(dst, entry->value);
 		kunmap_atomic(dst);
 		ret = 0;
-		goto freeentry;
+		goto stats;
 	}
 
 	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
-
 		tmp = kmalloc(entry->length, GFP_ATOMIC);
 		if (!tmp) {
 			ret = -ENOMEM;
@@ -1292,10 +1292,8 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 		src += sizeof(struct zswap_header);
 
 	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
-
 		memcpy(tmp, src, entry->length);
 		src = tmp;
-
 		zpool_unmap_handle(entry->pool->zpool, entry->handle);
 	}
 
@@ -1314,7 +1312,8 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 		kfree(tmp);
 
 	BUG_ON(ret);
-
+stats:
+	count_vm_event(ZSWAPIN);
 freeentry:
 	spin_lock(&tree->lock);
 	zswap_entry_put(tree, entry);