@@ -118,6 +118,9 @@ static inline void zswap_store_batch(struct swap_in_memory_cache_cb *simc)
else
__zswap_store_batch_single(simc);
}
+
+bool zswap_load_batching_enabled(void);
+
unsigned long zswap_total_pages(void);
bool zswap_store(struct folio *folio);
bool zswap_load(struct folio *folio);
@@ -145,6 +148,11 @@ static inline void zswap_store_batch(struct swap_in_memory_cache_cb *simc)
{
}
+static inline bool zswap_load_batching_enabled(void)
+{
+ return false;
+}
+
static inline bool zswap_store(struct folio *folio)
{
return false;
@@ -137,6 +137,19 @@ config ZSWAP_STORE_BATCHING_ENABLED
in the folio in hardware, thereby improving large folio compression
throughput and reducing swapout latency.
+config ZSWAP_LOAD_BATCHING_ENABLED
+ bool "Batching of zswap loads of 4K folios with Intel IAA"
+ depends on ZSWAP && CRYPTO_DEV_IAA_CRYPTO
+ default n
+ help
+ Enables zswap_load to swapin multiple 4K folios in batches of 8,
+ rather than a folio at a time, if the system has Intel IAA for hardware
+ acceleration of decompressions. swapin_readahead will be used to
+ prefetch a batch of folios to be swapped in along with the faulting
+ folio. If IAA is the zswap compressor, this will parallelize batch
+ decompression of upto 8 folios in hardware, thereby reducing swapin
+ and do_swap_page latency.
+
choice
prompt "Default allocator"
depends on ZSWAP
@@ -136,6 +136,13 @@ module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
static bool __zswap_store_batching_enabled = IS_ENABLED(
CONFIG_ZSWAP_STORE_BATCHING_ENABLED);
+/*
+ * Enable/disable batching of decompressions of multiple 4K folios, if
+ * the system has Intel IAA.
+ */
+static bool __zswap_load_batching_enabled = IS_ENABLED(
+ CONFIG_ZSWAP_LOAD_BATCHING_ENABLED);
+
bool zswap_is_enabled(void)
{
return zswap_enabled;
@@ -246,6 +253,11 @@ __always_inline bool zswap_store_batching_enabled(void)
return __zswap_store_batching_enabled;
}
+__always_inline bool zswap_load_batching_enabled(void)
+{
+ return __zswap_load_batching_enabled;
+}
+
static void __zswap_store_batch_core(
int node_id,
struct folio **folios,
Add a new zswap config variable that controls whether zswap load will decompress a batch of 4K folios, for instance, the folios prefetched during swapin_readahead(): CONFIG_ZSWAP_LOAD_BATCHING_ENABLED The existing CONFIG_CRYPTO_DEV_IAA_CRYPTO variable added in commit ea7a5cbb4369 ("crypto: iaa - Add Intel IAA Compression Accelerator crypto driver core") is used to detect if the system has the Intel Analytics Accelerator (IAA), and the iaa_crypto module is available. If so, the kernel build will prompt for CONFIG_ZSWAP_LOAD_BATCHING_ENABLED. Hence, users have the ability to set CONFIG_ZSWAP_LOAD_BATCHING_ENABLED="y" only on systems that have Intel IAA. If CONFIG_ZSWAP_LOAD_BATCHING_ENABLED is enabled, and IAA is configured as the zswap compressor, the vm.page-cluster is used to prefetch up to 32 4K folios using swapin_readahead(). The readahead folios present in zswap are then loaded as a batch using IAA decompression batching. The patch also implements a zswap API that returns the status of this config variable. Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com> --- include/linux/zswap.h | 8 ++++++++ mm/Kconfig | 13 +++++++++++++ mm/zswap.c | 12 ++++++++++++ 3 files changed, 33 insertions(+)