diff mbox series

[RFC,v1,1/7] mm: zswap: Config variable to enable zswap loads with decompress batching.

Message ID 20241018064805.336490-2-kanchana.p.sridhar@intel.com (mailing list archive)
State New
Headers show
Series zswap IAA decompress batching | expand

Commit Message

Sridhar, Kanchana P Oct. 18, 2024, 6:47 a.m. UTC
Add a new zswap config variable that controls whether zswap load will
decompress a batch of 4K folios, for instance, the folios prefetched
during swapin_readahead():

  CONFIG_ZSWAP_LOAD_BATCHING_ENABLED

The existing CONFIG_CRYPTO_DEV_IAA_CRYPTO variable added in commit
ea7a5cbb4369 ("crypto: iaa - Add Intel IAA Compression Accelerator crypto
driver core") is used to detect if the system has the Intel Analytics
Accelerator (IAA), and the iaa_crypto module is available. If so, the
kernel build will prompt for CONFIG_ZSWAP_LOAD_BATCHING_ENABLED. Hence,
users have the ability to set CONFIG_ZSWAP_LOAD_BATCHING_ENABLED="y" only
on systems that have Intel IAA.

If CONFIG_ZSWAP_LOAD_BATCHING_ENABLED is enabled, and IAA is configured
as the zswap compressor, the vm.page-cluster is used to prefetch up to
32 4K folios using swapin_readahead(). The readahead folios present in
zswap are then loaded as a batch using IAA decompression batching.

The patch also implements a zswap API that returns the status of this
config variable.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 include/linux/zswap.h |  8 ++++++++
 mm/Kconfig            | 13 +++++++++++++
 mm/zswap.c            | 12 ++++++++++++
 3 files changed, 33 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 328a1e09d502..294d13efbfb1 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -118,6 +118,9 @@  static inline void zswap_store_batch(struct swap_in_memory_cache_cb *simc)
 	else
 		__zswap_store_batch_single(simc);
 }
+
+bool zswap_load_batching_enabled(void);
+
 unsigned long zswap_total_pages(void);
 bool zswap_store(struct folio *folio);
 bool zswap_load(struct folio *folio);
@@ -145,6 +148,11 @@  static inline void zswap_store_batch(struct swap_in_memory_cache_cb *simc)
 {
 }
 
+static inline bool zswap_load_batching_enabled(void)
+{
+	return false;
+}
+
 static inline bool zswap_store(struct folio *folio)
 {
 	return false;
diff --git a/mm/Kconfig b/mm/Kconfig
index 26d1a5cee471..98e46a3cf0e3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,6 +137,19 @@  config ZSWAP_STORE_BATCHING_ENABLED
 	in the folio in	hardware, thereby improving large folio compression
 	throughput and reducing swapout latency.
 
+config ZSWAP_LOAD_BATCHING_ENABLED
+	bool "Batching of zswap loads of 4K folios with Intel IAA"
+	depends on ZSWAP && CRYPTO_DEV_IAA_CRYPTO
+	default n
+	help
+	Enables zswap_load to swapin multiple 4K folios in batches of 8,
+	rather than a folio at a time, if the system has Intel IAA for hardware
+	acceleration of decompressions. swapin_readahead will be used to
+	prefetch a batch of folios to be swapped in along with the faulting
+	folio. If IAA is the zswap compressor, this will parallelize batch
+	decompression of upto 8 folios in hardware, thereby reducing swapin
+	and do_swap_page latency.
+
 choice
 	prompt "Default allocator"
 	depends on ZSWAP
diff --git a/mm/zswap.c b/mm/zswap.c
index 68ce498ad000..fe7bc2a6672e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -136,6 +136,13 @@  module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
 static bool __zswap_store_batching_enabled = IS_ENABLED(
 	CONFIG_ZSWAP_STORE_BATCHING_ENABLED);
 
+/*
+ * Enable/disable batching of decompressions of multiple 4K folios, if
+ * the system has Intel IAA.
+ */
+static bool __zswap_load_batching_enabled = IS_ENABLED(
+	CONFIG_ZSWAP_LOAD_BATCHING_ENABLED);
+
 bool zswap_is_enabled(void)
 {
 	return zswap_enabled;
@@ -246,6 +253,11 @@  __always_inline bool zswap_store_batching_enabled(void)
 	return __zswap_store_batching_enabled;
 }
 
+__always_inline bool zswap_load_batching_enabled(void)
+{
+	return __zswap_load_batching_enabled;
+}
+
 static void __zswap_store_batch_core(
 	int node_id,
 	struct folio **folios,