Message ID | 20220219230151.56282-2-sebastian@breakpoint.cc (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | trace-cmd library: Add ZSTD support. | expand |
On Sun, Feb 20, 2022 at 1:02 AM Sebastian Andrzej Siewior <sebastian@breakpoint.cc> wrote: > Hi Sebastian, Thank you for contributing zstd support for trace-cmd! The zlib was chosen for the PoC implementation of the trace file compression support, as it is one of the widely available compression libraries. But as you have already seen, the design allows easily adding multiple compression algorithms. Support for a new compression library is less than 100 lines of code, that's why I suggest to keep the zlib support - even though the default will be zstd. > The zstd support is using the context aware function so there is no need > to for the library to allocate one (including the memory) for every > invocation. This requires to be used in a single threaded environment or > the API needs to be extended to pass the context parameter. Good point, the current design is according to zlib - that's why there is no context. But I like the idea to have a library specific context and this should be added now, before the first release. > > In most cases the input buffer was 40KiB so it does not make sense to > use higher compression levels. Higher compression levels won't > significantly improve the compression ration given that the every 40KiB > block is independent. However higher compression levels will slow down > the compression process. > By default, the buffer is 10 system pages - so it could be larger on some systems. > The upper level stores 4 bytes compressed and decompressed size. In > order to not save the decompressed size twice, the library won't store > the store in each compressed block. This shrinks the frame header a > little. In theory the ZSTD-magic (4 bytes) could be stripped away but > that is little complicated and the 4 bytes shouldn't hurt. We could even > enable check summing to be sure that the compressed block wasn't > accidentally tampered. > > Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> > --- > Makefile | 12 ++++ > lib/trace-cmd/Makefile | 5 +- > lib/trace-cmd/include/trace-cmd-local.h | 4 ++ > lib/trace-cmd/trace-compress-zstd.c | 91 +++++++++++++++++++++++++ > lib/trace-cmd/trace-compress.c | 3 + > tracecmd/Makefile | 2 +- > 6 files changed, 115 insertions(+), 2 deletions(-) > create mode 100644 lib/trace-cmd/trace-compress-zstd.c > > diff --git a/Makefile b/Makefile > index f5c2cdb894f9a..109cbeb29002a 100644 > --- a/Makefile > +++ b/Makefile > @@ -307,6 +307,18 @@ CFLAGS += -DHAVE_ZLIB > $(info Have zlib compression support) > endif > > +TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y") > + > +ifeq ("$(TEST_LIBZSTD)", "y") > +LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd") > +LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd") > +CFLAGS += -DHAVE_ZSTD > +ZSTD_INSTALLED=1 > +$(info Have ZSTD compression support) > +endif > + > +export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED > + > CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) > export CUNIT_INSTALLED > > diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile > index 1820c67b48474..da0ad4deeb4f0 100644 > --- a/lib/trace-cmd/Makefile > +++ b/lib/trace-cmd/Makefile > @@ -29,6 +29,9 @@ OBJS += trace-compress.o > ifeq ($(ZLIB_INSTALLED), 1) > OBJS += trace-compress-zlib.o > endif > +ifeq ($(ZSTD_INSTALLED), 1) > +OBJS += trace-compress-zstd.o > +endif > > # Additional util objects > OBJS += trace-blk-hack.o > @@ -48,7 +51,7 @@ $(DEPS): | $(bdir) > $(LIBTRACECMD_STATIC): $(OBJS) > $(Q)$(call do_build_static_lib) > > -LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) -lpthread > +LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread > > ifeq ($(ZLIB_INSTALLED), 1) > LIBS += -lz > diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h > index 48f179d6f524a..8601a15f86f22 100644 > --- a/lib/trace-cmd/include/trace-cmd-local.h > +++ b/lib/trace-cmd/include/trace-cmd-local.h > @@ -30,6 +30,10 @@ void tracecmd_info(const char *fmt, ...); > int tracecmd_zlib_init(void); > #endif > > +#ifdef HAVE_ZLIB I think you mean HAVE_ZSTD here. > +int tracecmd_zstd_init(void); > +#endif > + > struct data_file_write { > unsigned long long file_size; > unsigned long long write_size; > diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c > new file mode 100644 > index 0000000000000..fc5e350f32509 > --- /dev/null > +++ b/lib/trace-cmd/trace-compress-zstd.c > @@ -0,0 +1,91 @@ > +// SPDX-License-Identifier: LGPL-2.1 > +/* > + * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc> > + * > + */ > +#include <stdlib.h> > +#include <zstd.h> > +#include <errno.h> > + > +#include "trace-cmd-private.h" > + > +#define __ZSTD_NAME "zstd" > +#define __ZSTD_WEIGTH 5 > + > +static ZSTD_CCtx *ctx_c; > +static ZSTD_DCtx *ctx_d; > + > +static int zstd_compress(const char *in, unsigned int in_bytes, > + char *out, unsigned int *out_bytes) > +{ > + size_t ret; > + > + ret = ZSTD_compress2(ctx_c, out, *out_bytes, in, in_bytes); > + if (ZSTD_isError(ret)) > + return -1; > + *out_bytes = ret; > + return 0; > +} > + > +static int zstd_decompress(const char *in, unsigned int in_bytes, > + char *out, unsigned int *out_bytes) > +{ > + size_t ret; > + > + ret = ZSTD_decompressDCtx(ctx_d, out, *out_bytes, in, in_bytes); > + if (ZSTD_isError(ret)) { > + errno = -EINVAL; > + return -1; > + } > + *out_bytes = ret; > + errno = 0; > + return 0; > +} > + > +static unsigned int zstd_compress_bound(unsigned int in_bytes) > +{ > + return ZSTD_compressBound(in_bytes); > +} > + > +static bool zstd_is_supported(const char *name, const char *version) > +{ > + if (!name) > + return false; > + if (strcmp(name, __ZSTD_NAME)) > + return false; > + > + return true; > +} > + > +int tracecmd_zstd_init(void) > +{ > + int ret = 0; > + size_t r; > + > + ctx_c = ZSTD_createCCtx(); > + ctx_d = ZSTD_createDCtx(); > + if (!ctx_c || !ctx_d) > + goto err; > + > + r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0); > + if (ZSTD_isError(r)) > + goto err; > + > + ret = tracecmd_compress_proto_register(__ZSTD_NAME, > + ZSTD_versionString(), > + __ZSTD_WEIGTH, > + zstd_compress, > + zstd_decompress, > + zstd_compress_bound, > + zstd_is_supported); I think tracecmd_compress_proto_register() should be extend with two new hooks: void *(*new_context)(void), void (*free_context)(void *), and ctx_c, ctx_d should be allocated and freed there. Currently, that logic in trace-cmd is in a single thread. That may change in the future, and then the current design will be a limitation. > + if (!ret) > + return 0; > +err: > + ZSTD_freeCCtx(ctx_c); > + ZSTD_freeDCtx(ctx_d); > + ctx_c = NULL; > + ctx_d = NULL; > + if (ret < 0) > + return ret; > + return -1; > +} > diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c > index 210d58b602577..a14155a321f45 100644 > --- a/lib/trace-cmd/trace-compress.c > +++ b/lib/trace-cmd/trace-compress.c > @@ -390,6 +390,9 @@ void tracecmd_compress_init(void) > #ifdef HAVE_ZLIB > tracecmd_zlib_init(); > #endif > +#ifdef HAVE_ZSTD > + tracecmd_zstd_init(); > +#endif > } > > static struct compress_proto *compress_proto_select(void) > diff --git a/tracecmd/Makefile b/tracecmd/Makefile > index 56742f0afa2f8..355f04723ad7c 100644 > --- a/tracecmd/Makefile > +++ b/tracecmd/Makefile > @@ -49,7 +49,7 @@ all_objs := $(sort $(ALL_OBJS)) > all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d) > > CONFIG_INCLUDES = > -CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) > +CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS) > CONFIG_FLAGS = > > ifeq ($(ZLIB_INSTALLED), 1) > -- > 2.35.1 >
On 2022-02-21 08:08:51 [+0200], Tzvetomir Stoyanov wrote: > On Sun, Feb 20, 2022 at 1:02 AM Sebastian Andrzej Siewior > <sebastian@breakpoint.cc> wrote: > > > > Hi Sebastian, Hi Tzvetomir, > Thank you for contributing zstd support for trace-cmd! The zlib was > chosen for the PoC implementation of the trace file compression > support, as it is one of the widely available compression libraries. > But as you have already seen, the design allows easily adding multiple > compression algorithms. Support for a new compression library is less > than 100 lines of code, that's why I suggest to keep the zlib support > - even though the default will be zstd. Oki. > > The zstd support is using the context aware function so there is no need > > to for the library to allocate one (including the memory) for every > > invocation. This requires to be used in a single threaded environment or > > the API needs to be extended to pass the context parameter. > > Good point, the current design is according to zlib - that's why there > is no context. But I like the idea to have a library specific context > and this should be added now, before the first release. cool. > > > > In most cases the input buffer was 40KiB so it does not make sense to > > use higher compression levels. Higher compression levels won't > > significantly improve the compression ration given that the every 40KiB > > block is independent. However higher compression levels will slow down > > the compression process. > > > > By default, the buffer is 10 system pages - so it could be larger on > some systems. Just a hint that if we increase the buffer size then the compression would benefit from it ;) > > diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h > > index 48f179d6f524a..8601a15f86f22 100644 > > --- a/lib/trace-cmd/include/trace-cmd-local.h > > +++ b/lib/trace-cmd/include/trace-cmd-local.h > > @@ -30,6 +30,10 @@ void tracecmd_info(const char *fmt, ...); > > int tracecmd_zlib_init(void); > > #endif > > > > +#ifdef HAVE_ZLIB > > I think you mean HAVE_ZSTD here. Yeah, fixed in v2, thanks. > > diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c > > new file mode 100644 > > index 0000000000000..fc5e350f32509 > > --- /dev/null > > +++ b/lib/trace-cmd/trace-compress-zstd.c … > > +int tracecmd_zstd_init(void) > > +{ > > + int ret = 0; > > + size_t r; > > + > > + ctx_c = ZSTD_createCCtx(); > > + ctx_d = ZSTD_createDCtx(); > > + if (!ctx_c || !ctx_d) > > + goto err; > > + > > + r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0); > > + if (ZSTD_isError(r)) > > + goto err; > > + > > + ret = tracecmd_compress_proto_register(__ZSTD_NAME, > > + ZSTD_versionString(), > > + __ZSTD_WEIGTH, > > + zstd_compress, > > + zstd_decompress, > > + zstd_compress_bound, > > + zstd_is_supported); > > I think tracecmd_compress_proto_register() should be extend with two new hooks: > void *(*new_context)(void), > void (*free_context)(void *), > and ctx_c, ctx_d should be allocated and freed there. Currently, that > logic in trace-cmd is in a single thread. That may change in the > future, and then the current design will be a limitation. I would suggest to use struct tracecmd_compress_algo { const char *name, const char *version, int weight, func_t *compress; func_t *decompress; func_t *bound; func_t *supported; }; and then ret = tracecmd_compress_proto_register(&comp_algo); so that the function has less args and can be extended without touching every algo. As for func_t / calling convetion I would prefer something that is not that close to zlib. Like int comp(void *ctx, const void *src, signed int in_size, void *dst, signed int out_size); This clearly limits the sizes to 31 bit (with the 40kiB probably okay) and the return value can be either >= 0 returning the number of bytes produced or negative for an error. And please no errno touching ;) Sebastian
On Mon, Feb 21, 2022 at 3:55 PM Sebastian Andrzej Siewior <sebastian@breakpoint.cc> wrote: > [ ... ] > > > +int tracecmd_zstd_init(void) > > > +{ > > > + int ret = 0; > > > + size_t r; > > > + > > > + ctx_c = ZSTD_createCCtx(); > > > + ctx_d = ZSTD_createDCtx(); > > > + if (!ctx_c || !ctx_d) > > > + goto err; > > > + > > > + r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0); > > > + if (ZSTD_isError(r)) > > > + goto err; > > > + > > > + ret = tracecmd_compress_proto_register(__ZSTD_NAME, > > > + ZSTD_versionString(), > > > + __ZSTD_WEIGTH, > > > + zstd_compress, > > > + zstd_decompress, > > > + zstd_compress_bound, > > > + zstd_is_supported); > > > > I think tracecmd_compress_proto_register() should be extend with two new hooks: > > void *(*new_context)(void), > > void (*free_context)(void *), > > and ctx_c, ctx_d should be allocated and freed there. Currently, that > > logic in trace-cmd is in a single thread. That may change in the > > future, and then the current design will be a limitation. > > I would suggest to use > > struct tracecmd_compress_algo { > const char *name, > const char *version, > int weight, > func_t *compress; > func_t *decompress; > func_t *bound; > func_t *supported; > }; > > and then > ret = tracecmd_compress_proto_register(&comp_algo); > > so that the function has less args and can be extended without touching > every algo. We prefer to use explicit API arguments, to ensure that on API's change the old callers of the API will be updated. Using such structure is more flexible, but even though in most cases the change will be backward compatible - it could be hard to debug the legacy code in case the API's behaviour changes because of adding some new parameter in the structure. > > As for func_t / calling convetion I would prefer something that is not > that close to zlib. Like > > int comp(void *ctx, const void *src, signed int in_size, > void *dst, signed int out_size); > > This clearly limits the sizes to 31 bit (with the 40kiB probably okay) > and the return value can be either >= 0 returning the number of bytes > produced or negative for an error. And please no errno touching ;) > I like the idea to make these hooks more generic, not so close to a specific library. But there should be some error reporting mechanism, why not to use errno ? In the best case, the library itself should set the errno. But zlib has its own error codes, and according to its documentation not all of the APIs set errno in case of an error. That's why I added this explicit setting of errno in case of some Z_ERROR. Are you interested to submit a patch with these suggestions ? I also can do it, when Steven merges zstd support and before the next trace-cmd release. > Sebastian -- Tzvetomir (Ceco) Stoyanov VMware Open Source Technology Center
On Tue, 22 Feb 2022 05:42:56 +0200 Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote: > > > > > > I think tracecmd_compress_proto_register() should be extend with > > > two new hooks: void *(*new_context)(void), > > > void (*free_context)(void *), > > > and ctx_c, ctx_d should be allocated and freed there. Currently, > > > that logic in trace-cmd is in a single thread. That may change in > > > the future, and then the current design will be a limitation. > > > > I would suggest to use > > > > struct tracecmd_compress_algo { > > const char *name, > > const char *version, > > int weight, > > func_t *compress; > > func_t *decompress; > > func_t *bound; > > func_t *supported; > > }; > > > > and then > > ret = tracecmd_compress_proto_register(&comp_algo); > > > > so that the function has less args and can be extended without > > touching every algo. > > We prefer to use explicit API arguments, to ensure that on API's > change the old callers of the API will be updated. Using such > structure is more flexible, but even though in most cases the change > will be backward compatible - it could be hard to debug the legacy > code in case the API's behaviour changes because of adding some new > parameter in the structure. The way this is done in the kernel interface (aka. system calls) is to pass a structure along with the size. That is, the API will know which version of the API is being called by the size of the structure. Anything new will have to be appended to the structure, which will increase it. If the library receives a structure of a smaller size, but one that was once supported, it could use that information to just do the old method. In other words, only use the fields of a structure that fit into the size given. > > > > > As for func_t / calling convetion I would prefer something that is > > not that close to zlib. Like > > > > int comp(void *ctx, const void *src, signed int in_size, > > void *dst, signed int out_size); > > > > This clearly limits the sizes to 31 bit (with the 40kiB probably > > okay) and the return value can be either >= 0 returning the number > > of bytes produced or negative for an error. And please no errno > > touching ;) Oh, we are always touching errno ;-) > > I like the idea to make these hooks more generic, not so close to a > specific library. But there should be some error reporting mechanism, > why not to use errno ? In the best case, the library itself should set > the errno. But zlib has its own error codes, and according to its > documentation not all of the APIs set errno in case of an error. > That's why I added this explicit setting of errno in case of some > Z_ERROR. > > Are you interested to submit a patch with these suggestions ? I also > can do it, when Steven merges zstd support and before the next > trace-cmd release. I'm hoping to get everything merged this week, but probably will not release trace-cmd until next week. I'm still sorting out the libraries. But I'm hoping that they are almost done. -- Steve
On 2022-02-22 05:42:56 [+0200], Tzvetomir Stoyanov wrote: > We prefer to use explicit API arguments, to ensure that on API's > change the old callers of the API will be updated. Using such > structure is more flexible, but even though in most cases the change > will be backward compatible - it could be hard to debug the legacy > code in case the API's behaviour changes because of adding some new > parameter in the structure. It depends. xz, openssl, … have ABIs and structs which change. I assumed that struct is internal to trace-cmd. But even if it isn't and it is public then it is part of lib-trace-cmd's ABI. Once you change here something you need to incremebt your so number and this requires to recompile all downstream users aka a library transition. By using C99 initializers you ensure that removed / replaced struct members don't remain used. > > As for func_t / calling convetion I would prefer something that is not > > that close to zlib. Like > > > > int comp(void *ctx, const void *src, signed int in_size, > > void *dst, signed int out_size); > > > > This clearly limits the sizes to 31 bit (with the 40kiB probably okay) > > and the return value can be either >= 0 returning the number of bytes > > produced or negative for an error. And please no errno touching ;) > > > > I like the idea to make these hooks more generic, not so close to a > specific library. You reassmeble the zlib thing. Here you have comp/decomp functions with pointer and size. And instead updating dst' size you have a return value defined as >= 0: decompressed / compressed to < 0: error number defined by trace-cmd > But there should be some error reporting mechanism, > why not to use errno ? In the best case, the library itself should set > the errno. But why touching errno? errno is defined / used by the OS. Based on the architecture the kernel returns two values. If sys_open succeeds then it returns the file handle. On failure -1 and errno is set the actual error. Some archiectures use two registers (one -1 and the other positive errno) for that some use one (positive or negative errno). The C-library then knows what the architecture does and ensures the API stays the same. A library like yours should not touch errno. If you encounter an error, you should define your own error numbers and use those. So your compression library detects a checksum error during decompression what do you do? EINVAL because the input is invalid? EBADMSG because the input is a bad message? But it is defined as "Not a data message" so maybe not. Maybe EILSEQ becase the data sequence is invalid. So you basically need to guess and pick something that close to what you have. While you could define your own codes and return those. > But zlib has its own error codes, and according to its > documentation not all of the APIs set errno in case of an error. > That's why I added this explicit setting of errno in case of some > Z_ERROR. But I though you wanted to somethign generic and be close a specifc library :) zlib is not a good example in terms of an API. No matter if you look zstd or xz _or_ something else recent (as in not from 80s). Both the compression libs I mentioned perform explicit framing and you as the user know when something ends. zlib is a little difficult sometimes. You do have the luxury that you know your input. I remember rsync does some things to be sure if there is an error or the input is actual over… > Are you interested to submit a patch with these suggestions ? I also > can do it, when Steven merges zstd support and before the next > trace-cmd release. Well if nobody objects, nobody does it first then maybe as I can't promise that I can make time before the weekend (and the weekend is currently full). Sebastian
diff --git a/Makefile b/Makefile index f5c2cdb894f9a..109cbeb29002a 100644 --- a/Makefile +++ b/Makefile @@ -307,6 +307,18 @@ CFLAGS += -DHAVE_ZLIB $(info Have zlib compression support) endif +TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y") + +ifeq ("$(TEST_LIBZSTD)", "y") +LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd") +LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd") +CFLAGS += -DHAVE_ZSTD +ZSTD_INSTALLED=1 +$(info Have ZSTD compression support) +endif + +export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED + CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) export CUNIT_INSTALLED diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile index 1820c67b48474..da0ad4deeb4f0 100644 --- a/lib/trace-cmd/Makefile +++ b/lib/trace-cmd/Makefile @@ -29,6 +29,9 @@ OBJS += trace-compress.o ifeq ($(ZLIB_INSTALLED), 1) OBJS += trace-compress-zlib.o endif +ifeq ($(ZSTD_INSTALLED), 1) +OBJS += trace-compress-zstd.o +endif # Additional util objects OBJS += trace-blk-hack.o @@ -48,7 +51,7 @@ $(DEPS): | $(bdir) $(LIBTRACECMD_STATIC): $(OBJS) $(Q)$(call do_build_static_lib) -LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) -lpthread +LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread ifeq ($(ZLIB_INSTALLED), 1) LIBS += -lz diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h index 48f179d6f524a..8601a15f86f22 100644 --- a/lib/trace-cmd/include/trace-cmd-local.h +++ b/lib/trace-cmd/include/trace-cmd-local.h @@ -30,6 +30,10 @@ void tracecmd_info(const char *fmt, ...); int tracecmd_zlib_init(void); #endif +#ifdef HAVE_ZLIB +int tracecmd_zstd_init(void); +#endif + struct data_file_write { unsigned long long file_size; unsigned long long write_size; diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c new file mode 100644 index 0000000000000..fc5e350f32509 --- /dev/null +++ b/lib/trace-cmd/trace-compress-zstd.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc> + * + */ +#include <stdlib.h> +#include <zstd.h> +#include <errno.h> + +#include "trace-cmd-private.h" + +#define __ZSTD_NAME "zstd" +#define __ZSTD_WEIGTH 5 + +static ZSTD_CCtx *ctx_c; +static ZSTD_DCtx *ctx_d; + +static int zstd_compress(const char *in, unsigned int in_bytes, + char *out, unsigned int *out_bytes) +{ + size_t ret; + + ret = ZSTD_compress2(ctx_c, out, *out_bytes, in, in_bytes); + if (ZSTD_isError(ret)) + return -1; + *out_bytes = ret; + return 0; +} + +static int zstd_decompress(const char *in, unsigned int in_bytes, + char *out, unsigned int *out_bytes) +{ + size_t ret; + + ret = ZSTD_decompressDCtx(ctx_d, out, *out_bytes, in, in_bytes); + if (ZSTD_isError(ret)) { + errno = -EINVAL; + return -1; + } + *out_bytes = ret; + errno = 0; + return 0; +} + +static unsigned int zstd_compress_bound(unsigned int in_bytes) +{ + return ZSTD_compressBound(in_bytes); +} + +static bool zstd_is_supported(const char *name, const char *version) +{ + if (!name) + return false; + if (strcmp(name, __ZSTD_NAME)) + return false; + + return true; +} + +int tracecmd_zstd_init(void) +{ + int ret = 0; + size_t r; + + ctx_c = ZSTD_createCCtx(); + ctx_d = ZSTD_createDCtx(); + if (!ctx_c || !ctx_d) + goto err; + + r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0); + if (ZSTD_isError(r)) + goto err; + + ret = tracecmd_compress_proto_register(__ZSTD_NAME, + ZSTD_versionString(), + __ZSTD_WEIGTH, + zstd_compress, + zstd_decompress, + zstd_compress_bound, + zstd_is_supported); + if (!ret) + return 0; +err: + ZSTD_freeCCtx(ctx_c); + ZSTD_freeDCtx(ctx_d); + ctx_c = NULL; + ctx_d = NULL; + if (ret < 0) + return ret; + return -1; +} diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c index 210d58b602577..a14155a321f45 100644 --- a/lib/trace-cmd/trace-compress.c +++ b/lib/trace-cmd/trace-compress.c @@ -390,6 +390,9 @@ void tracecmd_compress_init(void) #ifdef HAVE_ZLIB tracecmd_zlib_init(); #endif +#ifdef HAVE_ZSTD + tracecmd_zstd_init(); +#endif } static struct compress_proto *compress_proto_select(void) diff --git a/tracecmd/Makefile b/tracecmd/Makefile index 56742f0afa2f8..355f04723ad7c 100644 --- a/tracecmd/Makefile +++ b/tracecmd/Makefile @@ -49,7 +49,7 @@ all_objs := $(sort $(ALL_OBJS)) all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d) CONFIG_INCLUDES = -CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) +CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS) CONFIG_FLAGS = ifeq ($(ZLIB_INSTALLED), 1)
The zstd support is using the context aware function so there is no need to for the library to allocate one (including the memory) for every invocation. This requires to be used in a single threaded environment or the API needs to be extended to pass the context parameter. In most cases the input buffer was 40KiB so it does not make sense to use higher compression levels. Higher compression levels won't significantly improve the compression ration given that the every 40KiB block is independent. However higher compression levels will slow down the compression process. The upper level stores 4 bytes compressed and decompressed size. In order to not save the decompressed size twice, the library won't store the store in each compressed block. This shrinks the frame header a little. In theory the ZSTD-magic (4 bytes) could be stripped away but that is little complicated and the 4 bytes shouldn't hurt. We could even enable check summing to be sure that the compressed block wasn't accidentally tampered. Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> --- Makefile | 12 ++++ lib/trace-cmd/Makefile | 5 +- lib/trace-cmd/include/trace-cmd-local.h | 4 ++ lib/trace-cmd/trace-compress-zstd.c | 91 +++++++++++++++++++++++++ lib/trace-cmd/trace-compress.c | 3 + tracecmd/Makefile | 2 +- 6 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 lib/trace-cmd/trace-compress-zstd.c