Message ID | 20210819195533.211756-4-hannes@cmpxchg.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/4] mm: Kconfig: move swap and slab config options to the MM section | expand |
+CC zswap maintainers On 8/19/21 21:55, Johannes Weiner wrote: > Currently it requires poking at debugfs to figure out the size of the > zswap cache size on a host. There are no counters for reads and writes > against the cache. This makes it difficult to understand behavior on > production systems. > > Print zswap memory consumption in /proc/meminfo, count zswapouts and > zswapins in /proc/vmstat. > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> > --- > fs/proc/meminfo.c | 4 ++++ > include/linux/swap.h | 4 ++++ > include/linux/vm_event_item.h | 4 ++++ > mm/vmstat.c | 4 ++++ > mm/zswap.c | 11 +++++------ > 5 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index 6fa761c9cc78..2dc474940691 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > show_val_kb(m, "SwapTotal: ", i.totalswap); > show_val_kb(m, "SwapFree: ", i.freeswap); > +#ifdef CONFIG_ZSWAP > + seq_printf(m, "Zswap: %8lu kB\n", > + (unsigned long)(zswap_pool_total_size >> 10)); > +#endif > show_val_kb(m, "Dirty: ", > global_node_page_state(NR_FILE_DIRTY)); > show_val_kb(m, "Writeback: ", > diff --git a/include/linux/swap.h b/include/linux/swap.h > index 144727041e78..3b23c88b6a8d 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -696,6 +696,10 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) > } > #endif > > +#ifdef CONFIG_ZSWAP > +extern u64 zswap_pool_total_size; > +#endif > + > #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) > extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); > #else > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index ae0dd1948c2b..9dbebea09c69 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -125,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > SWAP_RA, > SWAP_RA_HIT, > #endif > +#ifdef CONFIG_ZSWAP > + ZSWPIN, > + ZSWPOUT, > +#endif > #ifdef CONFIG_X86 > DIRECT_MAP_LEVEL2_SPLIT, > DIRECT_MAP_LEVEL3_SPLIT, > diff --git a/mm/vmstat.c b/mm/vmstat.c > index cccee36b289c..31aada15c571 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1369,6 +1369,10 @@ const char * const vmstat_text[] = { > "swap_ra", > "swap_ra_hit", > #endif > +#ifdef CONFIG_ZSWAP > + "zswpin", > + "zswpout", > +#endif > #ifdef CONFIG_X86 > "direct_map_level2_splits", > "direct_map_level3_splits", > diff --git a/mm/zswap.c b/mm/zswap.c > index 20763267a219..f93a7c715f76 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -40,7 +40,7 @@ > * statistics > **********************************/ > /* Total bytes used by the compressed storage */ > -static u64 zswap_pool_total_size; > +u64 zswap_pool_total_size; > /* The number of compressed pages currently stored in zswap */ > static atomic_t zswap_stored_pages = ATOMIC_INIT(0); > /* The number of same-value filled pages currently stored in zswap */ > @@ -1231,6 +1231,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, > /* update stats */ > atomic_inc(&zswap_stored_pages); > zswap_update_total_size(); > + count_vm_event(ZSWAPOUT); > > return 0; > > @@ -1273,11 +1274,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > zswap_fill_page(dst, entry->value); > kunmap_atomic(dst); > ret = 0; > - goto freeentry; > + goto stats; > } > > if (!zpool_can_sleep_mapped(entry->pool->zpool)) { > - > tmp = kmalloc(entry->length, GFP_ATOMIC); > if (!tmp) { > ret = -ENOMEM; > @@ -1292,10 +1292,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > src += sizeof(struct zswap_header); > > if (!zpool_can_sleep_mapped(entry->pool->zpool)) { > - > memcpy(tmp, src, entry->length); > src = tmp; > - > zpool_unmap_handle(entry->pool->zpool, entry->handle); > } > > @@ -1314,7 +1312,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > kfree(tmp); > > BUG_ON(ret); > - > +stats: > + count_vm_event(ZSWAPIN); > freeentry: > spin_lock(&tree->lock); > zswap_entry_put(tree, entry); >
Hi Johannes, I love your patch! Yet something to improve: [auto build test ERROR on linux/master] [cannot apply to hnaz-linux-mm/master block/for-next linus/master v5.14 next-20210830] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Johannes-Weiner/mm-Kconfig-move-swap-and-slab-config-options-to-the-MM-section/20210820-035613 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 config: x86_64-rhel-8.3 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/216a0ba919927dccd2dd26d7af1e395f4360002f git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Johannes-Weiner/mm-Kconfig-move-swap-and-slab-config-options-to-the-MM-section/20210820-035613 git checkout 216a0ba919927dccd2dd26d7af1e395f4360002f # save the attached .config to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): mm/zswap.c: In function 'zswap_frontswap_store': >> mm/zswap.c:1234:17: error: 'ZSWAPOUT' undeclared (first use in this function); did you mean 'ZSWPOUT'? 1234 | count_vm_event(ZSWAPOUT); | ^~~~~~~~ | ZSWPOUT mm/zswap.c:1234:17: note: each undeclared identifier is reported only once for each function it appears in mm/zswap.c: In function 'zswap_frontswap_load': >> mm/zswap.c:1316:17: error: 'ZSWAPIN' undeclared (first use in this function); did you mean 'ZSWPIN'? 1316 | count_vm_event(ZSWAPIN); | ^~~~~~~ | ZSWPIN vim +1234 mm/zswap.c 1080 1081 /********************************* 1082 * frontswap hooks 1083 **********************************/ 1084 /* attempts to compress and store an single page */ 1085 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1086 struct page *page) 1087 { 1088 struct zswap_tree *tree = zswap_trees[type]; 1089 struct zswap_entry *entry, *dupentry; 1090 struct scatterlist input, output; 1091 struct crypto_acomp_ctx *acomp_ctx; 1092 int ret; 1093 unsigned int hlen, dlen = PAGE_SIZE; 1094 unsigned long handle, value; 1095 char *buf; 1096 u8 *src, *dst; 1097 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; 1098 gfp_t gfp; 1099 1100 /* THP isn't supported */ 1101 if (PageTransHuge(page)) { 1102 ret = -EINVAL; 1103 goto reject; 1104 } 1105 1106 if (!zswap_enabled || !tree) { 1107 ret = -ENODEV; 1108 goto reject; 1109 } 1110 1111 /* reclaim space if needed */ 1112 if (zswap_is_full()) { 1113 struct zswap_pool *pool; 1114 1115 zswap_pool_limit_hit++; 1116 zswap_pool_reached_full = true; 1117 pool = zswap_pool_last_get(); 1118 if (pool) 1119 queue_work(shrink_wq, &pool->shrink_work); 1120 ret = -ENOMEM; 1121 goto reject; 1122 } 1123 1124 if (zswap_pool_reached_full) { 1125 if (!zswap_can_accept()) { 1126 ret = -ENOMEM; 1127 goto reject; 1128 } else 1129 zswap_pool_reached_full = false; 1130 } 1131 1132 /* allocate entry */ 1133 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1134 if (!entry) { 1135 zswap_reject_kmemcache_fail++; 1136 ret = -ENOMEM; 1137 goto reject; 1138 } 1139 1140 if (zswap_same_filled_pages_enabled) { 1141 src = kmap_atomic(page); 1142 if (zswap_is_page_same_filled(src, &value)) { 1143 kunmap_atomic(src); 1144 entry->offset = offset; 1145 entry->length = 0; 1146 entry->value = value; 1147 atomic_inc(&zswap_same_filled_pages); 1148 goto insert_entry; 1149 } 1150 kunmap_atomic(src); 1151 } 1152 1153 /* if entry is successfully added, it keeps the reference */ 1154 entry->pool = zswap_pool_current_get(); 1155 if (!entry->pool) { 1156 ret = -EINVAL; 1157 goto freepage; 1158 } 1159 1160 /* compress */ 1161 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1162 1163 mutex_lock(acomp_ctx->mutex); 1164 1165 dst = acomp_ctx->dstmem; 1166 sg_init_table(&input, 1); 1167 sg_set_page(&input, page, PAGE_SIZE, 0); 1168 1169 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1170 sg_init_one(&output, dst, PAGE_SIZE * 2); 1171 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1172 /* 1173 * it maybe looks a little bit silly that we send an asynchronous request, 1174 * then wait for its completion synchronously. This makes the process look 1175 * synchronous in fact. 1176 * Theoretically, acomp supports users send multiple acomp requests in one 1177 * acomp instance, then get those requests done simultaneously. but in this 1178 * case, frontswap actually does store and load page by page, there is no 1179 * existing method to send the second page before the first page is done 1180 * in one thread doing frontswap. 1181 * but in different threads running on different cpu, we have different 1182 * acomp instance, so multiple threads can do (de)compression in parallel. 1183 */ 1184 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1185 dlen = acomp_ctx->req->dlen; 1186 1187 if (ret) { 1188 ret = -EINVAL; 1189 goto put_dstmem; 1190 } 1191 1192 /* store */ 1193 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; 1194 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1195 if (zpool_malloc_support_movable(entry->pool->zpool)) 1196 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1197 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); 1198 if (ret == -ENOSPC) { 1199 zswap_reject_compress_poor++; 1200 goto put_dstmem; 1201 } 1202 if (ret) { 1203 zswap_reject_alloc_fail++; 1204 goto put_dstmem; 1205 } 1206 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW); 1207 memcpy(buf, &zhdr, hlen); 1208 memcpy(buf + hlen, dst, dlen); 1209 zpool_unmap_handle(entry->pool->zpool, handle); 1210 mutex_unlock(acomp_ctx->mutex); 1211 1212 /* populate entry */ 1213 entry->offset = offset; 1214 entry->handle = handle; 1215 entry->length = dlen; 1216 1217 insert_entry: 1218 /* map */ 1219 spin_lock(&tree->lock); 1220 do { 1221 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1222 if (ret == -EEXIST) { 1223 zswap_duplicate_entry++; 1224 /* remove from rbtree */ 1225 zswap_rb_erase(&tree->rbroot, dupentry); 1226 zswap_entry_put(tree, dupentry); 1227 } 1228 } while (ret == -EEXIST); 1229 spin_unlock(&tree->lock); 1230 1231 /* update stats */ 1232 atomic_inc(&zswap_stored_pages); 1233 zswap_update_total_size(); > 1234 count_vm_event(ZSWAPOUT); 1235 1236 return 0; 1237 1238 put_dstmem: 1239 mutex_unlock(acomp_ctx->mutex); 1240 zswap_pool_put(entry->pool); 1241 freepage: 1242 zswap_entry_cache_free(entry); 1243 reject: 1244 return ret; 1245 } 1246 1247 /* 1248 * returns 0 if the page was successfully decompressed 1249 * return -1 on entry not found or error 1250 */ 1251 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1252 struct page *page) 1253 { 1254 struct zswap_tree *tree = zswap_trees[type]; 1255 struct zswap_entry *entry; 1256 struct scatterlist input, output; 1257 struct crypto_acomp_ctx *acomp_ctx; 1258 u8 *src, *dst, *tmp; 1259 unsigned int dlen; 1260 int ret; 1261 1262 /* find */ 1263 spin_lock(&tree->lock); 1264 entry = zswap_entry_find_get(&tree->rbroot, offset); 1265 if (!entry) { 1266 /* entry was written back */ 1267 spin_unlock(&tree->lock); 1268 return -1; 1269 } 1270 spin_unlock(&tree->lock); 1271 1272 if (!entry->length) { 1273 dst = kmap_atomic(page); 1274 zswap_fill_page(dst, entry->value); 1275 kunmap_atomic(dst); 1276 ret = 0; 1277 goto stats; 1278 } 1279 1280 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1281 tmp = kmalloc(entry->length, GFP_ATOMIC); 1282 if (!tmp) { 1283 ret = -ENOMEM; 1284 goto freeentry; 1285 } 1286 } 1287 1288 /* decompress */ 1289 dlen = PAGE_SIZE; 1290 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1291 if (zpool_evictable(entry->pool->zpool)) 1292 src += sizeof(struct zswap_header); 1293 1294 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1295 memcpy(tmp, src, entry->length); 1296 src = tmp; 1297 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1298 } 1299 1300 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1301 mutex_lock(acomp_ctx->mutex); 1302 sg_init_one(&input, src, entry->length); 1303 sg_init_table(&output, 1); 1304 sg_set_page(&output, page, PAGE_SIZE, 0); 1305 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1306 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1307 mutex_unlock(acomp_ctx->mutex); 1308 1309 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1310 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1311 else 1312 kfree(tmp); 1313 1314 BUG_ON(ret); 1315 stats: > 1316 count_vm_event(ZSWAPIN); 1317 freeentry: 1318 spin_lock(&tree->lock); 1319 zswap_entry_put(tree, entry); 1320 spin_unlock(&tree->lock); 1321 1322 return ret; 1323 } 1324 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Johannes, On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote: > Currently it requires poking at debugfs to figure out the size of the > zswap cache size on a host. There are no counters for reads and writes > against the cache. This makes it difficult to understand behavior on > production systems. > > Print zswap memory consumption in /proc/meminfo, count zswapouts and > zswapins in /proc/vmstat. > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> > --- > fs/proc/meminfo.c | 4 ++++ > include/linux/swap.h | 4 ++++ > include/linux/vm_event_item.h | 4 ++++ > mm/vmstat.c | 4 ++++ > mm/zswap.c | 11 +++++------ > 5 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index 6fa761c9cc78..2dc474940691 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > show_val_kb(m, "SwapTotal: ", i.totalswap); > show_val_kb(m, "SwapFree: ", i.freeswap); > +#ifdef CONFIG_ZSWAP > + seq_printf(m, "Zswap: %8lu kB\n", > + (unsigned long)(zswap_pool_total_size >> 10)); Since we have zram as well as zswap, it would be great if we can abstract both at once without introducing another "Zram: " stuff in meminfo. A note: zram can support fs based on on zram blk device as well as swap. Thus, term would be better to say "compressed" rather than "swap". How about this? "Compressed: xx kB" unsigined long total_compressed_memory(void) { return zswap_compressed_mem() + zram_comporessed_mem(); } > +#endif > show_val_kb(m, "Dirty: ", > global_node_page_state(NR_FILE_DIRTY)); > show_val_kb(m, "Writeback: ", > diff --git a/include/linux/swap.h b/include/linux/swap.h > index 144727041e78..3b23c88b6a8d 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -696,6 +696,10 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) > } > #endif > > +#ifdef CONFIG_ZSWAP > +extern u64 zswap_pool_total_size; > +#endif > + > #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) > extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); > #else > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index ae0dd1948c2b..9dbebea09c69 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -125,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > SWAP_RA, > SWAP_RA_HIT, > #endif > +#ifdef CONFIG_ZSWAP > + ZSWPIN, > + ZSWPOUT, INMEM_SWP[IN|OUT] to represent both zram and zswap ? Feel free to suggest better word. > +#endif > #ifdef CONFIG_X86 > DIRECT_MAP_LEVEL2_SPLIT, > DIRECT_MAP_LEVEL3_SPLIT, > diff --git a/mm/vmstat.c b/mm/vmstat.c > index cccee36b289c..31aada15c571 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1369,6 +1369,10 @@ const char * const vmstat_text[] = { > "swap_ra", > "swap_ra_hit", > #endif > +#ifdef CONFIG_ZSWAP > + "zswpin", > + "zswpout", > +#endif > #ifdef CONFIG_X86 > "direct_map_level2_splits", > "direct_map_level3_splits", > diff --git a/mm/zswap.c b/mm/zswap.c > index 20763267a219..f93a7c715f76 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -40,7 +40,7 @@ > * statistics > **********************************/ > /* Total bytes used by the compressed storage */ > -static u64 zswap_pool_total_size; > +u64 zswap_pool_total_size; > /* The number of compressed pages currently stored in zswap */ > static atomic_t zswap_stored_pages = ATOMIC_INIT(0); > /* The number of same-value filled pages currently stored in zswap */ > @@ -1231,6 +1231,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, > /* update stats */ > atomic_inc(&zswap_stored_pages); > zswap_update_total_size(); > + count_vm_event(ZSWAPOUT); > > return 0; > > @@ -1273,11 +1274,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > zswap_fill_page(dst, entry->value); > kunmap_atomic(dst); > ret = 0; > - goto freeentry; > + goto stats; > } > > if (!zpool_can_sleep_mapped(entry->pool->zpool)) { > - > tmp = kmalloc(entry->length, GFP_ATOMIC); > if (!tmp) { > ret = -ENOMEM; > @@ -1292,10 +1292,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > src += sizeof(struct zswap_header); > > if (!zpool_can_sleep_mapped(entry->pool->zpool)) { > - > memcpy(tmp, src, entry->length); > src = tmp; > - > zpool_unmap_handle(entry->pool->zpool, entry->handle); > } > > @@ -1314,7 +1312,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, > kfree(tmp); > > BUG_ON(ret); > - > +stats: > + count_vm_event(ZSWAPIN); > freeentry: > spin_lock(&tree->lock); > zswap_entry_put(tree, entry); > -- > 2.32.0 > >
Hi Minchan, Sorry about the delay, I'm just now getting back to these patches. On Mon, Aug 30, 2021 at 11:49:59AM -0700, Minchan Kim wrote: > Hi Johannes, > > On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote: > > Currently it requires poking at debugfs to figure out the size of the > > zswap cache size on a host. There are no counters for reads and writes > > against the cache. This makes it difficult to understand behavior on > > production systems. > > > > Print zswap memory consumption in /proc/meminfo, count zswapouts and > > zswapins in /proc/vmstat. > > > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> > > --- > > fs/proc/meminfo.c | 4 ++++ > > include/linux/swap.h | 4 ++++ > > include/linux/vm_event_item.h | 4 ++++ > > mm/vmstat.c | 4 ++++ > > mm/zswap.c | 11 +++++------ > > 5 files changed, 21 insertions(+), 6 deletions(-) > > > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > > index 6fa761c9cc78..2dc474940691 100644 > > --- a/fs/proc/meminfo.c > > +++ b/fs/proc/meminfo.c > > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > > > show_val_kb(m, "SwapTotal: ", i.totalswap); > > show_val_kb(m, "SwapFree: ", i.freeswap); > > +#ifdef CONFIG_ZSWAP > > + seq_printf(m, "Zswap: %8lu kB\n", > > + (unsigned long)(zswap_pool_total_size >> 10)); > > Since we have zram as well as zswap, it would be great if > we can abstract both at once without introducing another > "Zram: " stuff in meminfo. A note: zram can support fs based on > on zram blk device as well as swap. Thus, term would be better > to say "compressed" rather than "swap". > > How about this? > > "Compressed: xx kB" Wouldn't it make more sense to keep separate counters? Zswap and zram are quite different from each other. From an MM perspective, zram is an opaque storage backend. zswap OTOH is an explicit MM cache stage which may in the future make different decisions than zram, be integrated into vmscan's LRU hierarchy etc. And in theory, you could put zswap with fast compression in front of a zram device with denser compression, right? I agree zram should probably also have memory counters, but I think it makes sense to recognize zswap as a unique MM layer.
Hi Johannes, On Tue, Nov 02, 2021 at 11:06:17AM -0400, Johannes Weiner wrote: > Hi Minchan, > > Sorry about the delay, I'm just now getting back to these patches. > > On Mon, Aug 30, 2021 at 11:49:59AM -0700, Minchan Kim wrote: > > Hi Johannes, > > > > On Thu, Aug 19, 2021 at 03:55:33PM -0400, Johannes Weiner wrote: > > > Currently it requires poking at debugfs to figure out the size of the > > > zswap cache size on a host. There are no counters for reads and writes > > > against the cache. This makes it difficult to understand behavior on > > > production systems. > > > > > > Print zswap memory consumption in /proc/meminfo, count zswapouts and > > > zswapins in /proc/vmstat. > > > > > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> > > > --- > > > fs/proc/meminfo.c | 4 ++++ > > > include/linux/swap.h | 4 ++++ > > > include/linux/vm_event_item.h | 4 ++++ > > > mm/vmstat.c | 4 ++++ > > > mm/zswap.c | 11 +++++------ > > > 5 files changed, 21 insertions(+), 6 deletions(-) > > > > > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > > > index 6fa761c9cc78..2dc474940691 100644 > > > --- a/fs/proc/meminfo.c > > > +++ b/fs/proc/meminfo.c > > > @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > > > > > show_val_kb(m, "SwapTotal: ", i.totalswap); > > > show_val_kb(m, "SwapFree: ", i.freeswap); > > > +#ifdef CONFIG_ZSWAP > > > + seq_printf(m, "Zswap: %8lu kB\n", > > > + (unsigned long)(zswap_pool_total_size >> 10)); > > > > Since we have zram as well as zswap, it would be great if > > we can abstract both at once without introducing another > > "Zram: " stuff in meminfo. A note: zram can support fs based on > > on zram blk device as well as swap. Thus, term would be better > > to say "compressed" rather than "swap". > > > > How about this? > > > > "Compressed: xx kB" > > Wouldn't it make more sense to keep separate counters? Zswap and zram > are quite different from each other. > > From an MM perspective, zram is an opaque storage backend. zswap OTOH > is an explicit MM cache stage which may in the future make different > decisions than zram, be integrated into vmscan's LRU hierarchy > etc. And in theory, you could put zswap with fast compression in front > of a zram device with denser compression, right? My view is the the allocators aims to store compressed memory. Likewise slab allocator, we could use the allocator any places and display the total memory usage from the allocator in meminfo instead of each subsystem and look at slabinfo if we need further break down. I think it could work for this case, too. > > I agree zram should probably also have memory counters, but I think it > makes sense to recognize zswap as a unique MM layer. Under your view, I think it would introduce Zram-swap, Zram-block as well as Zswap. If folks think it's better idea, I am fine and happy to post a patch to merge it along with this patch.
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6fa761c9cc78..2dc474940691 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -86,6 +86,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SwapTotal: ", i.totalswap); show_val_kb(m, "SwapFree: ", i.freeswap); +#ifdef CONFIG_ZSWAP + seq_printf(m, "Zswap: %8lu kB\n", + (unsigned long)(zswap_pool_total_size >> 10)); +#endif show_val_kb(m, "Dirty: ", global_node_page_state(NR_FILE_DIRTY)); show_val_kb(m, "Writeback: ", diff --git a/include/linux/swap.h b/include/linux/swap.h index 144727041e78..3b23c88b6a8d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -696,6 +696,10 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif +#ifdef CONFIG_ZSWAP +extern u64 zswap_pool_total_size; +#endif + #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); #else diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ae0dd1948c2b..9dbebea09c69 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -125,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, SWAP_RA, SWAP_RA_HIT, #endif +#ifdef CONFIG_ZSWAP + ZSWPIN, + ZSWPOUT, +#endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, diff --git a/mm/vmstat.c b/mm/vmstat.c index cccee36b289c..31aada15c571 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1369,6 +1369,10 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif +#ifdef CONFIG_ZSWAP + "zswpin", + "zswpout", +#endif #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", diff --git a/mm/zswap.c b/mm/zswap.c index 20763267a219..f93a7c715f76 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -40,7 +40,7 @@ * statistics **********************************/ /* Total bytes used by the compressed storage */ -static u64 zswap_pool_total_size; +u64 zswap_pool_total_size; /* The number of compressed pages currently stored in zswap */ static atomic_t zswap_stored_pages = ATOMIC_INIT(0); /* The number of same-value filled pages currently stored in zswap */ @@ -1231,6 +1231,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, /* update stats */ atomic_inc(&zswap_stored_pages); zswap_update_total_size(); + count_vm_event(ZSWAPOUT); return 0; @@ -1273,11 +1274,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, zswap_fill_page(dst, entry->value); kunmap_atomic(dst); ret = 0; - goto freeentry; + goto stats; } if (!zpool_can_sleep_mapped(entry->pool->zpool)) { - tmp = kmalloc(entry->length, GFP_ATOMIC); if (!tmp) { ret = -ENOMEM; @@ -1292,10 +1292,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, src += sizeof(struct zswap_header); if (!zpool_can_sleep_mapped(entry->pool->zpool)) { - memcpy(tmp, src, entry->length); src = tmp; - zpool_unmap_handle(entry->pool->zpool, entry->handle); } @@ -1314,7 +1312,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, kfree(tmp); BUG_ON(ret); - +stats: + count_vm_event(ZSWAPIN); freeentry: spin_lock(&tree->lock); zswap_entry_put(tree, entry);
Currently it requires poking at debugfs to figure out the size of the zswap cache size on a host. There are no counters for reads and writes against the cache. This makes it difficult to understand behavior on production systems. Print zswap memory consumption in /proc/meminfo, count zswapouts and zswapins in /proc/vmstat. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> --- fs/proc/meminfo.c | 4 ++++ include/linux/swap.h | 4 ++++ include/linux/vm_event_item.h | 4 ++++ mm/vmstat.c | 4 ++++ mm/zswap.c | 11 +++++------ 5 files changed, 21 insertions(+), 6 deletions(-)