diff mbox series

[v5,01/12] crypto: acomp - Add synchronous/asynchronous acomp request chaining.

Message ID 20241221063119.29140-2-kanchana.p.sridhar@intel.com (mailing list archive)
State Under Review
Delegated to: Herbert Xu
Headers show
Series zswap IAA compress batching | expand

Commit Message

Kanchana P Sridhar Dec. 21, 2024, 6:31 a.m. UTC
This patch is based on Herbert Xu's request chaining for ahash
("[PATCH 2/6] crypto: hash - Add request chaining API") [1]. The generic
framework for request chaining that's provided in the ahash implementation
has been used as reference to develop a similar synchronous request
chaining framework for crypto_acomp.

Furthermore, this commit develops an asynchronous request chaining
framework and API that iaa_crypto can use for request chaining with
parallelism, in order to fully benefit from Intel IAA's multiple
compress/decompress engines in hardware. This allows us to gain significant
latency improvements with IAA batching as compared to synchronous request
chaining.

 Usage of acomp request chaining API:
 ====================================

 Any crypto_acomp compressor can avail of request chaining as follows: by calling one of

 Step 1: Create request chain:

  Request 0 (the first req in the chain):

  void acomp_reqchain_init(struct acomp_req *req,
                           u32 flags, crypto_completion_t compl,
                           void *data);

  Subsequent requests:

  void acomp_request_chain(struct acomp_req *req,
                           struct acomp_req *head);

 Step 2: Process the request chain using the specified compress/decompress
         "op":

  2.a) Synchronous: the chain of requests is processed in series:

       int acomp_do_req_chain(struct acomp_req *req,
                              int (*op)(struct acomp_req *req));

  2.b) Asynchronous: the chain of requests is processed in parallel using a
       submit-poll paradigm:

       int acomp_do_async_req_chain(struct acomp_req *req,
                                    int (*op_submit)(struct acomp_req *req),
                                    int (*op_poll)(struct acomp_req *req));

Request chaining will be used in subsequent patches to implement
compress/decompress batching in the iaa_crypto driver for the two supported
IAA driver sync_modes:

  sync_mode = 'sync' will use (2.a),
  sync_mode = 'async' will use (2.b).

These files are directly re-used from [1] which is not yet merged:

include/crypto/algapi.h
include/linux/crypto.h

Hence, I am adding Herbert as the co-developer of this acomp request
chaining patch.

[1]: https://lore.kernel.org/linux-crypto/677614fbdc70b31df2e26483c8d2cd1510c8af91.1730021644.git.herbert@gondor.apana.org.au/

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by:
---
 crypto/acompress.c                  | 284 ++++++++++++++++++++++++++++
 include/crypto/acompress.h          |  41 ++++
 include/crypto/algapi.h             |  10 +
 include/crypto/internal/acompress.h |  10 +
 include/linux/crypto.h              |  31 +++
 5 files changed, 376 insertions(+)
diff mbox series

Patch

diff --git a/crypto/acompress.c b/crypto/acompress.c
index 6fdf0ff9f3c0..cb6444d09dd7 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -23,6 +23,19 @@  struct crypto_scomp;
 
 static const struct crypto_type crypto_acomp_type;
 
+struct acomp_save_req_state {
+	struct list_head head;
+	struct acomp_req *req0;
+	struct acomp_req *cur;
+	int (*op)(struct acomp_req *req);
+	crypto_completion_t compl;
+	void *data;
+};
+
+static void acomp_reqchain_done(void *data, int err);
+static int acomp_save_req(struct acomp_req *req, crypto_completion_t cplt);
+static void acomp_restore_req(struct acomp_req *req);
+
 static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
 {
 	return container_of(alg, struct acomp_alg, calg.base);
@@ -123,6 +136,277 @@  struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
 
+static int acomp_save_req(struct acomp_req *req, crypto_completion_t cplt)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_save_req_state *state;
+	gfp_t gfp;
+	u32 flags;
+
+	if (!acomp_is_async(tfm))
+		return 0;
+
+	flags = acomp_request_flags(req);
+	gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?  GFP_KERNEL : GFP_ATOMIC;
+	state = kmalloc(sizeof(*state), gfp);
+	if (!state)
+		return -ENOMEM;
+
+	state->compl = req->base.complete;
+	state->data = req->base.data;
+	state->req0 = req;
+
+	req->base.complete = cplt;
+	req->base.data = state;
+
+	return 0;
+}
+
+static void acomp_restore_req(struct acomp_req *req)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_save_req_state *state;
+
+	if (!acomp_is_async(tfm))
+		return;
+
+	state = req->base.data;
+
+	req->base.complete = state->compl;
+	req->base.data = state->data;
+	kfree(state);
+}
+
+static int acomp_reqchain_finish(struct acomp_save_req_state *state,
+				 int err, u32 mask)
+{
+	struct acomp_req *req0 = state->req0;
+	struct acomp_req *req = state->cur;
+	struct acomp_req *n;
+
+	req->base.err = err;
+
+	if (req == req0)
+		INIT_LIST_HEAD(&req->base.list);
+	else
+		list_add_tail(&req->base.list, &req0->base.list);
+
+	list_for_each_entry_safe(req, n, &state->head, base.list) {
+		list_del_init(&req->base.list);
+
+		req->base.flags &= mask;
+		req->base.complete = acomp_reqchain_done;
+		req->base.data = state;
+		state->cur = req;
+		err = state->op(req);
+
+		if (err == -EINPROGRESS) {
+			if (!list_empty(&state->head))
+				err = -EBUSY;
+			goto out;
+		}
+
+		if (err == -EBUSY)
+			goto out;
+
+		req->base.err = err;
+		list_add_tail(&req->base.list, &req0->base.list);
+	}
+
+	acomp_restore_req(req0);
+
+out:
+	return err;
+}
+
+static void acomp_reqchain_done(void *data, int err)
+{
+	struct acomp_save_req_state *state = data;
+	crypto_completion_t compl = state->compl;
+
+	data = state->data;
+
+	if (err == -EINPROGRESS) {
+		if (!list_empty(&state->head))
+			return;
+		goto notify;
+	}
+
+	err = acomp_reqchain_finish(state, err, CRYPTO_TFM_REQ_MAY_BACKLOG);
+	if (err == -EBUSY)
+		return;
+
+notify:
+	compl(data, err);
+}
+
+int acomp_do_req_chain(struct acomp_req *req,
+		       int (*op)(struct acomp_req *req))
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_save_req_state *state;
+	struct acomp_save_req_state state0;
+	int err = 0;
+
+	if (!acomp_request_chained(req) || list_empty(&req->base.list) ||
+	    !crypto_acomp_req_chain(tfm))
+		return op(req);
+
+	state = &state0;
+
+	if (acomp_is_async(tfm)) {
+		err = acomp_save_req(req, acomp_reqchain_done);
+		if (err) {
+			struct acomp_req *r2;
+
+			req->base.err = err;
+			list_for_each_entry(r2, &req->base.list, base.list)
+				r2->base.err = err;
+
+			return err;
+		}
+
+		state = req->base.data;
+	}
+
+	state->op = op;
+	state->cur = req;
+	INIT_LIST_HEAD(&state->head);
+	list_splice(&req->base.list, &state->head);
+
+	err = op(req);
+	if (err == -EBUSY || err == -EINPROGRESS)
+		return -EBUSY;
+
+	return acomp_reqchain_finish(state, err, ~0);
+}
+EXPORT_SYMBOL_GPL(acomp_do_req_chain);
+
+static void acomp_async_reqchain_done(struct acomp_req *req0,
+				      struct list_head *state,
+				      int (*op_poll)(struct acomp_req *req))
+{
+	struct acomp_req *req, *n;
+	bool req0_done = false;
+	int err;
+
+	while (!list_empty(state)) {
+
+		if (!req0_done) {
+			err = op_poll(req0);
+			if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+				req0->base.err = err;
+				req0_done = true;
+			}
+		}
+
+		list_for_each_entry_safe(req, n, state, base.list) {
+			err = op_poll(req);
+
+			if (err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)
+				continue;
+
+			req->base.err = err;
+			list_del_init(&req->base.list);
+			list_add_tail(&req->base.list, &req0->base.list);
+		}
+	}
+
+	while (!req0_done) {
+		err = op_poll(req0);
+		if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+			req0->base.err = err;
+			break;
+		}
+	}
+}
+
+static int acomp_async_reqchain_finish(struct acomp_req *req0,
+				       struct list_head *state,
+				       int (*op_submit)(struct acomp_req *req),
+				       int (*op_poll)(struct acomp_req *req))
+{
+	struct acomp_req *req, *n;
+	int err = 0;
+
+	INIT_LIST_HEAD(&req0->base.list);
+
+	list_for_each_entry_safe(req, n, state, base.list) {
+		BUG_ON(req == req0);
+
+		err = op_submit(req);
+
+		if (!(err == -EINPROGRESS || err == -EBUSY)) {
+			req->base.err = err;
+			list_del_init(&req->base.list);
+			list_add_tail(&req->base.list, &req0->base.list);
+		}
+	}
+
+	acomp_async_reqchain_done(req0, state, op_poll);
+
+	return req0->base.err;
+}
+
+int acomp_do_async_req_chain(struct acomp_req *req,
+			     int (*op_submit)(struct acomp_req *req),
+			     int (*op_poll)(struct acomp_req *req))
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct list_head state;
+	struct acomp_req *r2;
+	int err = 0;
+	void *req0_data = req->base.data;
+
+	if (!acomp_request_chained(req) || list_empty(&req->base.list) ||
+		!acomp_is_async(tfm) || !crypto_acomp_req_chain(tfm)) {
+
+		err = op_submit(req);
+
+		if (err == -EINPROGRESS || err == -EBUSY) {
+			bool req0_done = false;
+
+			while (!req0_done) {
+				err = op_poll(req);
+				if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+					req->base.err = err;
+					break;
+				}
+			}
+		} else {
+			req->base.err = err;
+		}
+
+		req->base.data = req0_data;
+		if (acomp_is_async(tfm))
+			req->base.complete(req->base.data, req->base.err);
+
+		return err;
+	}
+
+	err = op_submit(req);
+	req->base.err = err;
+
+	if (err && !(err == -EINPROGRESS || err == -EBUSY))
+		goto err_prop;
+
+	INIT_LIST_HEAD(&state);
+	list_splice(&req->base.list, &state);
+
+	err = acomp_async_reqchain_finish(req, &state, op_submit, op_poll);
+	req->base.data = req0_data;
+	req->base.complete(req->base.data, req->base.err);
+
+	return err;
+
+err_prop:
+	list_for_each_entry(r2, &req->base.list, base.list)
+		r2->base.err = err;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(acomp_do_async_req_chain);
+
 struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
 {
 	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 54937b615239..eadc24514056 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -206,6 +206,7 @@  static inline void acomp_request_set_callback(struct acomp_req *req,
 	req->base.data = data;
 	req->base.flags &= CRYPTO_ACOMP_ALLOC_OUTPUT;
 	req->base.flags |= flgs & ~CRYPTO_ACOMP_ALLOC_OUTPUT;
+	req->base.flags &= ~CRYPTO_TFM_REQ_CHAIN;
 }
 
 /**
@@ -237,6 +238,46 @@  static inline void acomp_request_set_params(struct acomp_req *req,
 		req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
 }
 
+static inline u32 acomp_request_flags(struct acomp_req *req)
+{
+	return req->base.flags;
+}
+
+static inline void acomp_reqchain_init(struct acomp_req *req,
+				       u32 flags, crypto_completion_t compl,
+				       void *data)
+{
+	acomp_request_set_callback(req, flags, compl, data);
+	crypto_reqchain_init(&req->base);
+}
+
+static inline void acomp_reqchain_clear(struct acomp_req *req, void *data)
+{
+	struct crypto_wait *wait = (struct crypto_wait *)data;
+	reinit_completion(&wait->completion);
+	crypto_reqchain_clear(&req->base);
+	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   crypto_req_done, data);
+}
+
+static inline void acomp_request_chain(struct acomp_req *req,
+				       struct acomp_req *head)
+{
+	crypto_request_chain(&req->base, &head->base);
+}
+
+int acomp_do_req_chain(struct acomp_req *req,
+		       int (*op)(struct acomp_req *req));
+
+int acomp_do_async_req_chain(struct acomp_req *req,
+			     int (*op_submit)(struct acomp_req *req),
+			     int (*op_poll)(struct acomp_req *req));
+
+static inline int acomp_request_err(struct acomp_req *req)
+{
+	return req->base.err;
+}
+
 /**
  * crypto_acomp_compress() -- Invoke asynchronous compress operation
  *
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 156de41ca760..c5df380c7d08 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -271,4 +271,14 @@  static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
 }
 
+static inline bool crypto_request_chained(struct crypto_async_request *req)
+{
+	return req->flags & CRYPTO_TFM_REQ_CHAIN;
+}
+
+static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN;
+}
+
 #endif	/* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index 8831edaafc05..53b4ef59b48c 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -84,6 +84,16 @@  static inline void __acomp_request_free(struct acomp_req *req)
 	kfree_sensitive(req);
 }
 
+static inline bool acomp_request_chained(struct acomp_req *req)
+{
+	return crypto_request_chained(&req->base);
+}
+
+static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
+{
+	return crypto_tfm_req_chain(&tfm->base);
+}
+
 /**
  * crypto_register_acomp() -- Register asynchronous compression algorithm
  *
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b164da5e129e..7c27d557586c 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -13,6 +13,8 @@ 
 #define _LINUX_CRYPTO_H
 
 #include <linux/completion.h>
+#include <linux/errno.h>
+#include <linux/list.h>
 #include <linux/refcount.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -124,6 +126,9 @@ 
  */
 #define CRYPTO_ALG_FIPS_INTERNAL	0x00020000
 
+/* Set if the algorithm supports request chains. */
+#define CRYPTO_ALG_REQ_CHAIN		0x00040000
+
 /*
  * Transform masks and values (for crt_flags).
  */
@@ -133,6 +138,7 @@ 
 #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS	0x00000100
 #define CRYPTO_TFM_REQ_MAY_SLEEP	0x00000200
 #define CRYPTO_TFM_REQ_MAY_BACKLOG	0x00000400
+#define CRYPTO_TFM_REQ_CHAIN		0x00000800
 
 /*
  * Miscellaneous stuff.
@@ -174,6 +180,7 @@  struct crypto_async_request {
 	struct crypto_tfm *tfm;
 
 	u32 flags;
+	int err;
 };
 
 /**
@@ -540,5 +547,29 @@  int crypto_comp_decompress(struct crypto_comp *tfm,
 			   const u8 *src, unsigned int slen,
 			   u8 *dst, unsigned int *dlen);
 
+static inline void crypto_reqchain_init(struct crypto_async_request *req)
+{
+	req->err = -EINPROGRESS;
+	req->flags |= CRYPTO_TFM_REQ_CHAIN;
+	INIT_LIST_HEAD(&req->list);
+}
+
+static inline void crypto_reqchain_clear(struct crypto_async_request *req)
+{
+	req->flags &= ~CRYPTO_TFM_REQ_CHAIN;
+}
+
+static inline void crypto_request_chain(struct crypto_async_request *req,
+					struct crypto_async_request *head)
+{
+	req->err = -EINPROGRESS;
+	list_add_tail(&req->list, &head->list);
+}
+
+static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+}
+
 #endif	/* _LINUX_CRYPTO_H */