Message ID | 1473349947-11952-1-git-send-email-jdanis@android.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On 09/08/2016 11:52 AM, Janis Danisevskis wrote: > From: Janis Danisevskis <jdanis@google.com> > > This patch moves all pcre1/2 dependencies into the new files regex.h > and regex.c implementing the common denominator of features needed > by libselinux. The compiler flag -DUSE_PCRE2 toggles between the > used implementations. > > As of this patch libselinux supports either pcre or pcre2 but not > both at the same time. The persistently stored file contexts > information differs. This means libselinux can only load file > context files generated by sefcontext_compile build with the > same pcre variant. > > Also, for pcre2 the persistent format is architecture dependent. > Stored precompiled regular expressions can only be used on the > same architecture they were generated on. If pcre2 is used and > sefcontext_compile shall generate portable output, it and libselinux > must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the > cost of having to recompile the regular expressions at load time. > > Signed-off-by: Janis Danisevskis <jdanis@google.com> > > This patch includes includes: > > libselinux: fix memory leak on pcre2 > > Introduced a malloc on pcre_version(). Libselinux > expected this to be static, just use a static > internal buffer. > > Signed-off-by: William Roberts <william.c.roberts@intel.com> > --- > libselinux/Makefile | 13 + > libselinux/src/Makefile | 4 +- > libselinux/src/label_file.c | 93 ++----- > libselinux/src/label_file.h | 59 ++--- > libselinux/src/regex.c | 461 ++++++++++++++++++++++++++++++++++ > libselinux/src/regex.h | 169 +++++++++++++ > libselinux/utils/Makefile | 4 +- > libselinux/utils/sefcontext_compile.c | 55 +--- > 8 files changed, 697 insertions(+), 161 deletions(-) > create mode 100644 libselinux/src/regex.c > create mode 100644 libselinux/src/regex.h > > diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile > index 37d01af..66687e6 100644 > --- a/libselinux/src/Makefile > +++ b/libselinux/src/Makefile > @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi > -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \ > -Werror -Wno-aggregate-return -Wno-redundant-decls > > -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS) > > SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \ > -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations > @@ -113,7 +113,7 @@ $(LIBA): $(OBJS) > $(RANLIB) $@ > > $(LIBSO): $(LOBJS) > - $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro > + $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro > ln -sf $@ $(TARGET) > > $(LIBPC): $(LIBPC).in ../VERSION We want make to still work in this subdirectory, so we need defaults assigned to PCRE_CFLAGS and PCRE_LDFLAGS if not set by the caller. > diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h > index 6d1e890..24cb9e0 100644 > --- a/libselinux/src/label_file.h > +++ b/libselinux/src/label_file.h > @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec, > struct saved_data *data = (struct saved_data *)rec->data; > struct spec *spec_arr; > unsigned int nspec = data->nspec; > - const char *errbuf = NULL; > + char const *errbuf; Unnecessary change? > + struct regex_error_data error_data; > > items = read_spec_entries(line_buf, &errbuf, 3, ®ex, &type, &context); > if (items < 0) { > @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec, > data->nspec++; > > if (rec->validating && > - compile_regex(data, &spec_arr[nspec], &errbuf)) { > + compile_regex(data, &spec_arr[nspec], &error_data)) { Same bug as before - errbuf was being used in the following log call, so you need to change it to use regex_format_error(). > COMPAT_LOG(SELINUX_ERROR, > "%s: line %u has invalid regex %s: %s\n", > path, lineno, regex, > diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c > new file mode 100644 > index 0000000..558a72a > --- /dev/null > +++ b/libselinux/src/regex.c > @@ -0,0 +1,461 @@ > +#include <assert.h> > +#include <stdint.h> > +#include <stdio.h> > +#include <string.h> > + > +#include "regex.h" > +#include "label_file.h" > + > +#ifdef USE_PCRE2 > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { All of the non-static functions in this file need to be marked with "hidden" so that they are not part of the shared library ABI. Preferred coding style is Linux kernel, so opening bracket goes on a line by itself for functions. > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { > + int rc; > + size_t entry_len; As before, this needs to be uint32_t or it will be the wrong size on 64-bit systems and the subsequent next_entry() call won't fully initialize it. > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0) > + return -1; > + > + if (entry_len) { > + /* > + * this should yield exactly one because we store one pattern at a time > + */ > + rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); > + if (rc != 1) > + return -1; > + > + *regex = regex_data_create(); > + if (!*regex) > + return -1; > + > + rc = pcre2_serialize_decode(&(*regex)->regex, 1, > + (PCRE2_SPTR)mmap_area->next_addr, NULL); > + if (rc != 1) > + goto err; > + > + (*regex)->match_data = > + pcre2_match_data_create_from_pattern((*regex)->regex, NULL); > + if (!(*regex)->match_data) > + goto err; > + } > + > + /* and skip the decoded bit */ > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + return 0; > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc = 0; > + size_t len; > + PCRE2_SIZE to_write; > + PCRE2_UCHAR * bytes = NULL; > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS > + int do_write_patterns = 1; > +#else > + int do_write_patterns = 0; > +#endif > + > + if (do_write_patterns) { > + /* encode the patter for serialization */ > + rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex, 1, > + &bytes, &to_write, NULL); > + if (rc != 1) { > + rc = -1; > + goto err; > + } > + } else { > + to_write = 0; > + } > + > + /* write serialized pattern's size */ > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); We need a uint32_t variable here, not PCRE2_SIZE which may be larger. > + if (len != 1) { > + rc = -1; > + goto err; > + } > + > + if (do_write_patterns) { > + /* write serialized pattern */ > + len = fwrite(bytes, 1, to_write, fp); > + if (len != to_write) { > + rc = -1; > + } In general you don't need { } for a single statement except where it might otherwise be hard to read (e.g. helpful for the else statement above, but not really here or in several other cases below). > + } > + > +err: > + if (bytes) > + pcre2_serialize_free(bytes); > + > + return rc; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->regex) { > + pcre2_code_free(regex->regex); > + } > + if (regex->match_data) { > + pcre2_match_data_free(regex->match_data); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + rc = pcre2_match(regex->regex, > + (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, > + partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, > + NULL); > + if (rc > 0) > + return REGEX_MATCH; Indentation problem. > + switch (rc) { > + case PCRE2_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE2_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually compares the > + * regular expressions. > + * This compare function basically just compares the binary representations of > + * the automatons, and because this representation contains pointers and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#else // !USE_PCRE2 > + > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { > + memset(errordata, 0, sizeof(struct regex_error_data)); > + > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL, > + &errordata->error_buffer, > + &errordata->error_offset, NULL); > + if (!(*regex)->regex) { > + goto err; > + } > + (*regex)->owned = 1; > + > + (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); > + if (!(*regex)->sd && errordata->error_buffer) { > + goto err; > + } > + > + return 0; > + > +err: regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +char const * regex_version(void) { > + return pcre_version(); > +} > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { > + int rc; > + size_t entry_len, info_len; Same as before: entry_len needs to be uint32_t. > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + return -1; > + } > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->owned = 0; > + (*regex)->regex = (pcre *) mmap_area->next_addr; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* > + * Check that regex lengths match. pcre_fullinfo() > + * also validates its magic number. > + */ > + rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); > + if (rc < 0 || info_len != entry_len) { > + goto err; > + } > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + goto err; > + } > + > + if (entry_len) { > + (*regex)->lsd.study_data = (void *) mmap_area->next_addr; > + (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* Check that study data lengths match. */ > + rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, > + PCRE_INFO_STUDYSIZE, &info_len); > + if (rc < 0 || info_len != entry_len) > + goto err; > + } > + return 0; > + > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc; > + size_t len; > + uint32_t to_write; > + size_t size; > + pcre_extra * sd = regex->owned ? regex->sd : > + (regex->lsd.study_data ? ®ex->lsd : NULL); Recommend defining and using an inline function for the above and the similar, inconsistent case in regex_match() below. Prior to this patch, the equivalent was get_pcre_extra(). > + > + /* determine the size of the pcre data in bytes */ > + rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); > + if (rc < 0) > + return -1; > + > + /* write the number of bytes in the pcre data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + /* write the actual pcre data as a char array */ > + len = fwrite(regex->regex, 1, to_write, fp); > + if (len != to_write) > + return -1; > + > + if (sd) { > + /* determine the size of the pcre study info */ > + rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, > + &size); > + if (rc < 0) > + return -1; > + } else > + size = 0; > + > + /* write the number of bytes in the pcre study data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + if (sd) { > + /* write the actual pcre study data as a char array */ > + len = fwrite(sd->study_data, 1, to_write, fp); > + if (len != to_write) > + return -1; > + } > + > + return 0; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->owned) { > + if (regex->regex) > + pcre_free(regex->regex); > + if (regex->sd) > + pcre_free_study(regex->sd); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + > + rc = pcre_exec(regex->regex, regex->owned ? regex->sd : ®ex->lsd, get_pcre_extra() > + subject, strlen(subject), 0, > + partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); > + switch (rc) { > + case 0: > + return REGEX_MATCH; > + case PCRE_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually compares the > + * regular expressions. > + * This compare function basically just compares the binary representations of > + * the automatons, and because this representation contains pointers and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#endif > + > + > +struct regex_data * regex_data_create(void) { > + struct regex_data * dummy = (struct regex_data*) malloc( > + sizeof(struct regex_data)); > + if (dummy) { > + memset(dummy, 0, sizeof(struct regex_data)); > + } Just use calloc()? > + return dummy; > +} > +
> -----Original Message----- > From: Janis Danisevskis [mailto:jdanis@android.com] > Sent: Thursday, September 8, 2016 8:52 AM > To: selinux@tycho.nsa.gov; seandroid-list@tycho.nsa.gov; sds@tycho.nsa.gov; > jwcart2@tycho.nsa.gov > Cc: Janis Danisevskis <jdanis@google.com>; Roberts, William C > <william.c.roberts@intel.com> > Subject: [PATCH] libselinux: add support for pcre2 > > From: Janis Danisevskis <jdanis@google.com> > > This patch moves all pcre1/2 dependencies into the new files regex.h and regex.c > implementing the common denominator of features needed by libselinux. The > compiler flag -DUSE_PCRE2 toggles between the used implementations. > > As of this patch libselinux supports either pcre or pcre2 but not both at the same > time. The persistently stored file contexts information differs. This means > libselinux can only load file context files generated by sefcontext_compile build > with the same pcre variant. > > Also, for pcre2 the persistent format is architecture dependent. > Stored precompiled regular expressions can only be used on the same > architecture they were generated on. If pcre2 is used and sefcontext_compile > shall generate portable output, it and libselinux must be compiled with - > DNO_PERSISTENTLY_STORED_PATTERNS, at the cost of having to recompile the > regular expressions at load time. > > Signed-off-by: Janis Danisevskis <jdanis@google.com> > > This patch includes includes: Double includes > > libselinux: fix memory leak on pcre2 > > Introduced a malloc on pcre_version(). Libselinux expected this to be static, just > use a static internal buffer. > > Signed-off-by: William Roberts <william.c.roberts@intel.com> You can remove any attribution since its squashed down, this really doesn't Apply, so don't feel obligated to include any of this information. > --- > libselinux/Makefile | 13 + > libselinux/src/Makefile | 4 +- > libselinux/src/label_file.c | 93 ++----- > libselinux/src/label_file.h | 59 ++--- > libselinux/src/regex.c | 461 ++++++++++++++++++++++++++++++++++ > libselinux/src/regex.h | 169 +++++++++++++ > libselinux/utils/Makefile | 4 +- > libselinux/utils/sefcontext_compile.c | 55 +--- > 8 files changed, 697 insertions(+), 161 deletions(-) create mode 100644 > libselinux/src/regex.c create mode 100644 libselinux/src/regex.h > > diff --git a/libselinux/Makefile b/libselinux/Makefile index 6142b60..15d051e > 100644 > --- a/libselinux/Makefile > +++ b/libselinux/Makefile > @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y) endif export DISABLE_AVC > DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS > > +USE_PCRE2 ?= n > +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n ifeq ($(USE_PCRE2),y) > + PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8 > + ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y) > + PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS > + endif > + PCRE_LDFLAGS := -lpcre2-8 > +else > + PCRE_LDFLAGS := -lpcre > +endif > +export PCRE_CFLAGS PCRE_LDFLAGS > + > all install relabel clean distclean indent: > @for subdir in $(SUBDIRS); do \ > (cd $$subdir && $(MAKE) $@) || exit 1; \ diff --git > a/libselinux/src/Makefile b/libselinux/src/Makefile index 37d01af..66687e6 > 100644 > --- a/libselinux/src/Makefile > +++ b/libselinux/src/Makefile > @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat- > security -Winit-self -Wmissi > -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest- > attribute=const \ > -Werror -Wno-aggregate-return -Wno-redundant-decls > > -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE > +$(EMFLAGS) $(PCRE_CFLAGS) > > SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set- > variable -Wno-unused-parameter \ > -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes - > Wno-missing-declarations @@ -113,7 +113,7 @@ $(LIBA): $(OBJS) > $(RANLIB) $@ > > $(LIBSO): $(LOBJS) > - $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,- > soname,$(LIBSO),-z,defs,-z,relro > + $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) > +-L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro > ln -sf $@ $(TARGET) > > $(LIBPC): $(LIBPC).in ../VERSION > diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c index > c89bb35..e41c351 100644 > --- a/libselinux/src/label_file.c > +++ b/libselinux/src/label_file.c > @@ -15,7 +15,6 @@ > #include <errno.h> > #include <limits.h> > #include <stdint.h> > -#include <pcre.h> > #include <unistd.h> > #include <sys/mman.h> > #include <sys/types.h> > @@ -112,6 +111,7 @@ static int load_mmap(struct selabel_handle *rec, const > char *path, > struct mmap_area *mmap_area; > uint32_t i, magic, version; > uint32_t entry_len, stem_map_len, regex_array_len; > + const char *reg_version; > > if (isbinary) { > len = strlen(path); > @@ -175,8 +175,13 @@ static int load_mmap(struct selabel_handle *rec, const > char *path, > if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS) > return -1; > > + reg_version = regex_version(); > + if (!reg_version) > + return -1; > + > if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) { > - len = strlen(pcre_version()); > + > + len = strlen(reg_version); > > rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > if (rc < 0) > @@ -198,7 +203,7 @@ static int load_mmap(struct selabel_handle *rec, const > char *path, > } > > str_buf[entry_len] = '\0'; > - if ((strcmp(str_buf, pcre_version()) != 0)) { > + if ((strcmp(str_buf, reg_version) != 0)) { > free(str_buf); > return -1; > } > @@ -278,7 +283,6 @@ static int load_mmap(struct selabel_handle *rec, const > char *path, > > spec = &data->spec_arr[data->nspec]; > spec->from_mmap = 1; > - spec->regcomp = 1; > > /* Process context */ > rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); @@ - > 364,47 +368,10 @@ static int load_mmap(struct selabel_handle *rec, const char > *path, > spec->prefix_len = prefix_len; > } > > - /* Process regex and study_data entries */ > - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > - if (rc < 0 || !entry_len) { > - rc = -1; > - goto err; > - } > - spec->regex = (pcre *)mmap_area->next_addr; > - rc = next_entry(NULL, mmap_area, entry_len); > + rc = regex_load_mmap(mmap_area, &spec->regex); > if (rc < 0) > goto err; > > - /* Check that regex lengths match. pcre_fullinfo() > - * also validates its magic number. */ > - rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len); > - if (rc < 0 || len != entry_len) { > - rc = -1; > - goto err; > - } > - > - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > - if (rc < 0 || !entry_len) { > - rc = -1; > - goto err; > - } > - > - if (entry_len) { > - spec->lsd.study_data = (void *)mmap_area->next_addr; > - spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA; > - rc = next_entry(NULL, mmap_area, entry_len); > - if (rc < 0) > - goto err; > - > - /* Check that study data lengths match. */ > - rc = pcre_fullinfo(spec->regex, &spec->lsd, > - PCRE_INFO_STUDYSIZE, &len); > - if (rc < 0 || len != entry_len) { > - rc = -1; > - goto err; > - } > - } > - > data->nspec++; > } > > @@ -605,14 +572,11 @@ static void closef(struct selabel_handle *rec) > spec = &data->spec_arr[i]; > free(spec->lr.ctx_trans); > free(spec->lr.ctx_raw); > + regex_data_free(spec->regex); > if (spec->from_mmap) > continue; > free(spec->regex_str); > free(spec->type_str); > - if (spec->regcomp) { > - pcre_free(spec->regex); > - pcre_free_study(spec->sd); > - } > } > > for (i = 0; i < (unsigned int)data->num_stems; i++) { @@ -644,13 +608,14 > @@ static struct spec *lookup_common(struct selabel_handle *rec, { > struct saved_data *data = (struct saved_data *)rec->data; > struct spec *spec_arr = data->spec_arr; > - int i, rc, file_stem, pcre_options = 0; > + int i, rc, file_stem; > mode_t mode = (mode_t)type; > const char *buf; > struct spec *ret = NULL; > char *clean_key = NULL; > const char *prev_slash, *next_slash; > unsigned int sofar = 0; > + struct regex_error_data regex_error_data; > > if (!data->nspec) { > errno = ENOENT; > @@ -677,9 +642,6 @@ static struct spec *lookup_common(struct selabel_handle > *rec, > file_stem = find_stem_from_file(data, &buf); > mode &= S_IFMT; > > - if (partial) > - pcre_options |= PCRE_PARTIAL_SOFT; > - > /* > * Check for matching specifications in reverse order, so that > * the last matching specification is used. > @@ -692,25 +654,19 @@ static struct spec *lookup_common(struct > selabel_handle *rec, > * a regex check */ > if ((spec->stem_id == -1 || spec->stem_id == file_stem) && > (!mode || !spec->mode || mode == spec->mode)) { > - if (compile_regex(data, spec, NULL) < 0) > + if (compile_regex(data, spec, ®ex_error_data) < 0) > goto finish; > if (spec->stem_id == -1) > - rc = pcre_exec(spec->regex, > - get_pcre_extra(spec), > - key, strlen(key), 0, > - pcre_options, NULL, 0); > + rc = regex_match(spec->regex, key, partial); > else > - rc = pcre_exec(spec->regex, > - get_pcre_extra(spec), > - buf, strlen(buf), 0, > - pcre_options, NULL, 0); > - if (rc == 0) { > + rc = regex_match(spec->regex, buf, partial); > + if (rc == REGEX_MATCH) { > spec->matches++; > break; > - } else if (partial && rc == PCRE_ERROR_PARTIAL) > + } else if (partial && rc == REGEX_MATCH_PARTIAL) > break; > > - if (rc == PCRE_ERROR_NOMATCH) > + if (rc == REGEX_NO_MATCH) > continue; > > errno = ENOENT; > @@ -849,17 +805,10 @@ static enum selabel_cmp_result cmp(struct > selabel_handle *h1, > continue; > } > > - if (spec1->regcomp && spec2->regcomp) { > - size_t len1, len2; > - int rc; > - > - rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, > &len1); > - assert(rc == 0); > - rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, > &len2); > - assert(rc == 0); > - if (len1 != len2 || > - memcmp(spec1->regex, spec2->regex, len1)) > + if (spec1->regex && spec2->regex) { > + if (regex_cmp(spec1->regex, spec2->regex) == > SELABEL_INCOMPARABLE){ > return incomp(spec1, spec2, "regex", i, j); > + } > } else { > if (strcmp(spec1->regex_str, spec2->regex_str)) > return incomp(spec1, spec2, "regex_str", i, j); diff > --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h index > 6d1e890..24cb9e0 100644 > --- a/libselinux/src/label_file.h > +++ b/libselinux/src/label_file.h > @@ -6,6 +6,14 @@ > > #include <sys/stat.h> > > +/* > + * regex.h/c were introduced to hold all dependencies on the regular > + * expression back-end when we started supporting PCRE2. regex.h > +defines a > + * minimal interface required by libselinux, so that the remaining code > + * can be agnostic about the underlying implementation. > + */ > +#include "regex.h" > + > #include "callbacks.h" > #include "label_internal.h" > > @@ -19,26 +27,16 @@ > > #define SELINUX_COMPILED_FCONTEXT_MAX_VERS > SELINUX_COMPILED_FCONTEXT_PREFIX_LEN > > -/* Prior to version 8.20, libpcre did not have pcre_free_study() */ -#if > (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) -#define > pcre_free_study pcre_free -#endif > - > /* A file security context specification. */ struct spec { > struct selabel_lookup_rec lr; /* holds contexts for lookup result */ > char *regex_str; /* regular expession string for diagnostics */ > char *type_str; /* type string for diagnostic messages */ > - pcre *regex; /* compiled regular expression */ > - union { > - pcre_extra *sd; /* pointer to extra compiled stuff */ > - pcre_extra lsd; /* used to hold the mmap'd version */ > - }; > + struct regex_data * regex; /* backend dependent regular expression > +data */ > mode_t mode; /* mode format value */ > int matches; /* number of matching pathnames */ > int stem_id; /* indicates which stem-compression item */ > char hasMetaChars; /* regular expression has meta-chars */ > - char regcomp; /* regex_str has been compiled to regex */ > char from_mmap; /* this spec is from an mmap of the data > */ > size_t prefix_len; /* length of fixed path prefix */ > }; > @@ -78,17 +76,6 @@ struct saved_data { > struct mmap_area *mmap_areas; > }; > > -static inline pcre_extra *get_pcre_extra(struct spec *spec) -{ > - if (spec->from_mmap) { > - if (spec->lsd.study_data) > - return &spec->lsd; > - else > - return NULL; > - } else > - return spec->sd; > -} > - > static inline mode_t string_to_mode(char *mode) { > size_t len; > @@ -331,15 +318,14 @@ static inline int next_entry(void *buf, struct mmap_area > *fp, size_t bytes) } > > static inline int compile_regex(struct saved_data *data, struct spec *spec, > - const char **errbuf) > + struct regex_error_data * error_data) > { > - const char *tmperrbuf; > char *reg_buf, *anchored_regex, *cp; > struct stem *stem_arr = data->stem_arr; > size_t len; > - int erroff; > + int rc; > > - if (spec->regcomp) > + if (spec->regex) > return 0; /* already done */ > > /* Skip the fixed stem. */ > @@ -361,25 +347,13 @@ static inline int compile_regex(struct saved_data *data, > struct spec *spec, > *cp = '\0'; > > /* Compile the regular expression. */ > - spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, > &tmperrbuf, > - &erroff, NULL); > + rc = regex_prepare_data(&spec->regex, anchored_regex, error_data); > free(anchored_regex); > - if (!spec->regex) { > - if (errbuf) > - *errbuf = tmperrbuf; > - return -1; > - } > - > - spec->sd = pcre_study(spec->regex, 0, &tmperrbuf); > - if (!spec->sd && tmperrbuf) { > - if (errbuf) > - *errbuf = tmperrbuf; > + if (rc < 0) { > return -1; > } > > /* Done. */ > - spec->regcomp = 1; > - > return 0; > } > > @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec, > struct saved_data *data = (struct saved_data *)rec->data; > struct spec *spec_arr; > unsigned int nspec = data->nspec; > - const char *errbuf = NULL; > + char const *errbuf; > + struct regex_error_data error_data; > > items = read_spec_entries(line_buf, &errbuf, 3, ®ex, &type, > &context); > if (items < 0) { > @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec, > data->nspec++; > > if (rec->validating && > - compile_regex(data, &spec_arr[nspec], &errbuf)) { > + compile_regex(data, &spec_arr[nspec], &error_data)) { > COMPAT_LOG(SELINUX_ERROR, > "%s: line %u has invalid regex %s: %s\n", > path, lineno, regex, > diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c new file mode 100644 > index 0000000..558a72a > --- /dev/null > +++ b/libselinux/src/regex.c > @@ -0,0 +1,461 @@ > +#include <assert.h> > +#include <stdint.h> > +#include <stdio.h> > +#include <string.h> > + > +#include "regex.h" > +#include "label_file.h" > + > +#ifdef USE_PCRE2 > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { > + memset(errordata, 0, sizeof(struct regex_error_data)); > + > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string, > + PCRE2_ZERO_TERMINATED, > + PCRE2_DOTALL, > + &errordata->error_code, > + &errordata->error_offset, NULL); > + if (!(*regex)->regex) { > + goto err; > + } > + > + (*regex)->match_data = > + pcre2_match_data_create_from_pattern((*regex)->regex, > NULL); > + if (!(*regex)->match_data) { > + goto err; > + } > + return 0; > + > +err: regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +char const * regex_version(void) { > + static char version_buf[256]; > + size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL); > + if (len <= 0 || len > sizeof(version_buf)) > + return NULL; > + > + pcre2_config(PCRE2_CONFIG_VERSION, version_buf); > + return version_buf; > +} > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** > regex) { > + int rc; > + size_t entry_len; > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0) > + return -1; > + > + if (entry_len) { > + /* > + * this should yield exactly one because we store one pattern at a > time > + */ > + rc = pcre2_serialize_get_number_of_codes(mmap_area- > >next_addr); > + if (rc != 1) > + return -1; > + > + *regex = regex_data_create(); > + if (!*regex) > + return -1; > + > + rc = pcre2_serialize_decode(&(*regex)->regex, 1, > + (PCRE2_SPTR)mmap_area->next_addr, NULL); > + if (rc != 1) > + goto err; > + > + (*regex)->match_data = > + pcre2_match_data_create_from_pattern((*regex)- > >regex, NULL); > + if (!(*regex)->match_data) > + goto err; > + } > + > + /* and skip the decoded bit */ > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + return 0; > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc = 0; > + size_t len; > + PCRE2_SIZE to_write; > + PCRE2_UCHAR * bytes = NULL; > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS > + int do_write_patterns = 1; > +#else > + int do_write_patterns = 0; > +#endif > + > + if (do_write_patterns) { > + /* encode the patter for serialization */ > + rc = pcre2_serialize_encode((const pcre2_code **)®ex- > >regex, 1, > + &bytes, &to_write, NULL); > + if (rc != 1) { > + rc = -1; > + goto err; > + } > + } else { > + to_write = 0; > + } > + > + /* write serialized pattern's size */ > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) { > + rc = -1; > + goto err; > + } > + > + if (do_write_patterns) { > + /* write serialized pattern */ > + len = fwrite(bytes, 1, to_write, fp); > + if (len != to_write) { > + rc = -1; > + } > + } > + > +err: > + if (bytes) > + pcre2_serialize_free(bytes); > + > + return rc; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->regex) { > + pcre2_code_free(regex->regex); > + } > + if (regex->match_data) { > + pcre2_match_data_free(regex->match_data); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + rc = pcre2_match(regex->regex, > + (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, > + partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, > + NULL); > + if (rc > 0) > + return REGEX_MATCH; > + switch (rc) { > + case PCRE2_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE2_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually > +compares the > + * regular expressions. > + * This compare function basically just compares the binary > +representations of > + * the automatons, and because this representation contains pointers > +and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that > +computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#else // !USE_PCRE2 > + > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { > + memset(errordata, 0, sizeof(struct regex_error_data)); > + > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL, > + &errordata->error_buffer, > + &errordata->error_offset, NULL); > + if (!(*regex)->regex) { > + goto err; > + } > + (*regex)->owned = 1; > + > + (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata- > >error_buffer); > + if (!(*regex)->sd && errordata->error_buffer) { > + goto err; > + } > + > + return 0; > + > +err: regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +char const * regex_version(void) { > + return pcre_version(); > +} > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** > regex) { > + int rc; > + size_t entry_len, info_len; > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + return -1; > + } > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->owned = 0; > + (*regex)->regex = (pcre *) mmap_area->next_addr; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* > + * Check that regex lengths match. pcre_fullinfo() > + * also validates its magic number. > + */ > + rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); > + if (rc < 0 || info_len != entry_len) { > + goto err; > + } > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + goto err; > + } > + > + if (entry_len) { > + (*regex)->lsd.study_data = (void *) mmap_area->next_addr; > + (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* Check that study data lengths match. */ > + rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, > + PCRE_INFO_STUDYSIZE, &info_len); > + if (rc < 0 || info_len != entry_len) > + goto err; > + } > + return 0; > + > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc; > + size_t len; > + uint32_t to_write; > + size_t size; > + pcre_extra * sd = regex->owned ? regex->sd : > + (regex->lsd.study_data ? ®ex->lsd : NULL); > + > + /* determine the size of the pcre data in bytes */ > + rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); > + if (rc < 0) > + return -1; > + > + /* write the number of bytes in the pcre data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + /* write the actual pcre data as a char array */ > + len = fwrite(regex->regex, 1, to_write, fp); > + if (len != to_write) > + return -1; > + > + if (sd) { > + /* determine the size of the pcre study info */ > + rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, > + &size); > + if (rc < 0) > + return -1; > + } else > + size = 0; > + > + /* write the number of bytes in the pcre study data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + if (sd) { > + /* write the actual pcre study data as a char array */ > + len = fwrite(sd->study_data, 1, to_write, fp); > + if (len != to_write) > + return -1; > + } > + > + return 0; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->owned) { > + if (regex->regex) > + pcre_free(regex->regex); > + if (regex->sd) > + pcre_free_study(regex->sd); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + > + rc = pcre_exec(regex->regex, regex->owned ? regex->sd : ®ex->lsd, > + subject, strlen(subject), 0, > + partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); > + switch (rc) { > + case 0: > + return REGEX_MATCH; > + case PCRE_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually > +compares the > + * regular expressions. > + * This compare function basically just compares the binary > +representations of > + * the automatons, and because this representation contains pointers > +and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that > +computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#endif > + > + > +struct regex_data * regex_data_create(void) { > + struct regex_data * dummy = (struct regex_data*) malloc( > + sizeof(struct regex_data)); > + if (dummy) { > + memset(dummy, 0, sizeof(struct regex_data)); > + } > + return dummy; > +} > + > +void regex_format_error(struct regex_error_data const * error_data, > + char * buffer, size_t buf_size) { > + unsigned the_end_length = buf_size > 4 ? 4 : buf_size; > + char * ptr = &buffer[buf_size - the_end_length]; > + int rc = 0; > + size_t pos = 0; > + if (!buffer || !buf_size) > + return; > + rc = snprintf(buffer, buf_size, "REGEX back-end error: "); > + if (rc < 0) > + /* > + * If snprintf fails it constitutes a logical error that needs > + * fixing. > + */ > + abort(); > + > + pos += rc; > + if (pos >= buf_size) > + goto truncated; > + > + if (error_data->error_offset > 0) { > +#ifdef USE_PCRE2 > + rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ", > + error_data->error_offset); > +#else > + rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ", > + error_data->error_offset); > +#endif > + if (rc < 0) > + abort(); > + > + } > + pos += rc; > + if (pos >= buf_size) > + goto truncated; > + > +#ifdef USE_PCRE2 > + rc = pcre2_get_error_message(error_data->error_code, > + (PCRE2_UCHAR*)(buffer + pos), > + buf_size - pos); > + if (rc == PCRE2_ERROR_NOMEMORY) > + goto truncated; > +#else > + rc = snprintf(buffer + pos, buf_size - pos, "%s", > + error_data->error_buffer); > + if (rc < 0) > + abort(); > + > + if ((size_t)rc < strlen(error_data->error_buffer)) > + goto truncated; > +#endif > + > + return; > + > +truncated: > + /* replace end of string with "..." to indicate that it was truncated */ > + switch (the_end_length) { > + /* no break statements, fall-through is intended */ > + case 4: > + *ptr++ = '.'; > + case 3: > + *ptr++ = '.'; > + case 2: > + *ptr++ = '.'; > + case 1: > + *ptr++ = '\0'; > + default: > + break; > + } > + return; > +} > diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h new file mode 100644 > index 0000000..6a113e8 > --- /dev/null > +++ b/libselinux/src/regex.h > @@ -0,0 +1,169 @@ > +#ifndef SRC_REGEX_H_ > +#define SRC_REGEX_H_ > + > +#include <stdio.h> > + > +#ifdef USE_PCRE2 > +#include <pcre2.h> > +#else > +#include <pcre.h> > +#endif > + > +enum { > + REGEX_MATCH, > + REGEX_MATCH_PARTIAL, > + REGEX_NO_MATCH, > + REGEX_ERROR = -1, > +}; > + > +#ifdef USE_PCRE2 > +struct regex_data { As far as I can tell, and downloading and applying patches is a PITA for me at the moment, This struct is not dereferenced outside of regex.c, so I would imagine that we would only Want a forward declaration in regex.h and the definition in regex.c. I don't see a point in Exporting the structure internals to the rest of the code base if they are not used, just Make it opaque, > + pcre2_code * regex; /* compiled regular expression */ > + pcre2_match_data * match_data; /* match data block required for the > compiled > + pattern in regex2 */ > +}; > + > +struct regex_error_data { > + int error_code; > + PCRE2_SIZE error_offset; > +}; > + > +/* ^^^^^^ USE_PCRE2 ^^^^^^ */ > +#else > +/* vvvvvv USE_PCRE vvvvvv */ > + > +/* Prior to version 8.20, libpcre did not have pcre_free_study() */ #if > +(PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) #define > +pcre_free_study pcre_free #endif > + > +struct regex_data { > + int owned; /* > + * non zero if regex and pcre_extra is owned by this > + * structure and thus must be freed on destruction. > + */ > + pcre *regex; /* compiled regular expression */ > + union { > + pcre_extra *sd; /* pointer to extra compiled stuff */ > + pcre_extra lsd; /* used to hold the mmap'd version */ > + }; > +}; > + > +struct regex_error_data { > + char const * error_buffer; > + int error_offset; > +}; > + > +#endif /* USE_PCRE2 */ > + > +struct mmap_area; > + > +/** > + * regex_verison returns the version string of the underlying regular > + * regular expressions library. In the case of PCRE it just returns the > + * result of pcre_version(). In the case of PCRE2, the very first time > +this > + * function is called it allocates a buffer large enough to hold the > +version > + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer. > + * The allocated buffer will linger in memory until the calling process > +is being > + * reaped. > + * > + * It may return NULL on error. > + */ > +char const * regex_version(void); > +/** > + * This constructor function allocates a buffer for a regex_data structure. > + * The buffer is being initialized with zeroes. > + */ > +struct regex_data * regex_data_create(void); > +/** > + * This complementary destructor function frees the a given regex_data buffer. > + * It also frees any non NULL member pointers with the appropriate > +pcreX_X_free > + * function. For PCRE this function respects the extra_owned field and > +frees > + * the pcre_extra data conditionally. Calling this function on a NULL > +pointer is > + * save. > + */ > +void regex_data_free(struct regex_data * regex); > +/** > + * This function compiles the regular expression. Additionally, it > +prepares > + * data structures required by the different underlying engines. For > +PCRE > + * it calls pcre_study to generate optional data required for optimized > + * execution of the compiled pattern. In the case of PCRE2, it > +allocates > + * a pcre2_match_data structure of appropriate size to hold all > +possible > + * matches created by the pattern. > + * > + * @arg regex If successful, the structure returned through *regex was > allocated > + * with regex_data_create and must be freed with regex_data_free. > + * @arg pattern_string The pattern string that is to be compiled. > + * @arg errordata A pointer to a regex_error_data structure must be passed > + * to this function. This structure depends on the underlying > + * implementation. It can be passed to regex_format_error > + * to generate a human readable error message. > + * @retval 0 on success > + * @retval -1 on error > + */ > +int regex_prepare_data(struct regex_data ** regex, char const * > pattern_string, > + struct regex_error_data * errordata); > +/** > + * This function loads a serialized precompiled pattern from a > +contiguous > + * data region given by map_area. > + * > + * @arg map_area Description of the memory region holding a serialized > + * representation of the precompiled pattern. > + * @arg regex If successful, the structure returned through *regex was > allocated > + * with regex_data_create and must be freed with regex_data_free. > + * > + * @retval 0 on success > + * @retval -1 on error > + */ > +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** > +regex); > +/** > + * This function stores a precompiled regular expression to a file. > + * In the case of PCRE, it just dumps the binary representation of the > + * precomplied pattern into a file. In the case of PCRE2, it uses the > + * serialization function provided by the library. > + * > + * @arg regex The precomplied regular expression data. > + * @arg fp A file stream specifying the output file. > + */ > +int regex_writef(struct regex_data * regex, FILE * fp); > +/** > + * This function applies a precompiled pattern to a subject string and > + * returns whether or not a match was found. > + * > + * @arg regex The precompiled pattern. > + * @arg subject The subject string. > + * @arg partial Boolean indicating if partial matches are wanted. A nonzero > + * value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as > + * option to pcre_exec of pcre2_match. > + * @retval REGEX_MATCH if a match was found > + * @retval REGEX_MATCH_PARTIAL if a partial match was found > + * @retval REGEX_NO_MATCH if no match was found > + * @retval REGEX_ERROR if an error was encountered during the execution of > the > + * regular expression > + */ > +int regex_match(struct regex_data * regex, char const * subject, int > +partial); > +/** > + * This function compares two compiled regular expressions (regex1 and > regex2). > + * It compares the binary representations of the compiled patterns. It > +is a very > + * crude approximation because the binary representation holds data > +like > + * reference counters, that has nothing to do with the actual state machine. > + * > + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly > + * the same > + * @retval SELABEL_INCOMPARABLE otherwise */ int regex_cmp(struct > +regex_data * regex1, struct regex_data * regex2); > +/** > + * This function takes the error data returned by regex_prepare_data > +and turns > + * it in to a human readable error message. > + * If the buffer given to hold the error message is to small it > +truncates the > + * message and indicates the truncation with an ellipsis ("...") at the > +end of > + * the buffer. > + * > + * @arg error_data Error data as returned by regex_prepare_data. > + * @arg buffer String buffer to hold the formated error string. > + * @arg buf_size Total size of the given bufer in bytes. > + */ > +void regex_format_error(struct regex_error_data const * error_data, > + char * buffer, size_t buf_size); > +#endif /* SRC_REGEX_H_ */ > diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile index > 8497cb4..1e7a048 100644 > --- a/libselinux/utils/Makefile > +++ b/libselinux/utils/Makefile > @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat- > security -Winit-self -Wmissi > -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \ > -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest- > attribute=const \ > -Werror -Wno-aggregate-return -Wno-redundant-decls -override CFLAGS > += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE > +$(EMFLAGS) $(PCRE_CFLAGS) > LDLIBS += -L../src -lselinux -L$(LIBDIR) > > TARGETS=$(patsubst %.c,%,$(wildcard *.c)) > > -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol > +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a > +-lsepol > > selinux_restorecon: LDLIBS += -lsepol > > diff --git a/libselinux/utils/sefcontext_compile.c > b/libselinux/utils/sefcontext_compile.c > index fd6fb78..b6b8d92 100644 > --- a/libselinux/utils/sefcontext_compile.c > +++ b/libselinux/utils/sefcontext_compile.c > @@ -1,6 +1,5 @@ > #include <ctype.h> > #include <errno.h> > -#include <pcre.h> > #include <stdint.h> > #include <stdio.h> > #include <string.h> > @@ -13,6 +12,7 @@ > #include <sepol/sepol.h> > > #include "../src/label_file.h" > +#include "../src/regex.h" > > const char *policy_file; > static int ctx_err; > @@ -101,6 +101,7 @@ static int write_binary_file(struct saved_data *data, int fd) > uint32_t section_len; > uint32_t i; > int rc; > + const char *reg_version; > > bin_file = fdopen(fd, "w"); > if (!bin_file) { > @@ -119,12 +120,15 @@ static int write_binary_file(struct saved_data *data, int > fd) > if (len != 1) > goto err; > > - /* write the pcre version */ > - section_len = strlen(pcre_version()); > + /* write version of the regex back-end */ > + reg_version = regex_version(); > + if (!reg_version) > + goto err; > + section_len = strlen(reg_version); > len = fwrite(§ion_len, sizeof(uint32_t), 1, bin_file); > if (len != 1) > goto err; > - len = fwrite(pcre_version(), sizeof(char), section_len, bin_file); > + len = fwrite(reg_version, sizeof(char), section_len, bin_file); > if (len != section_len) > goto err; > > @@ -162,10 +166,8 @@ static int write_binary_file(struct saved_data *data, int > fd) > mode_t mode = specs[i].mode; > size_t prefix_len = specs[i].prefix_len; > int32_t stem_id = specs[i].stem_id; > - pcre *re = specs[i].regex; > - pcre_extra *sd = get_pcre_extra(&specs[i]); > + struct regex_data *re = specs[i].regex; > uint32_t to_write; > - size_t size; > > /* length of the context string (including nul) */ > to_write = strlen(context) + 1; > @@ -212,42 +214,10 @@ static int write_binary_file(struct saved_data *data, int > fd) > if (len != 1) > goto err; > > - /* determine the size of the pcre data in bytes */ > - rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); > + /* Write regex related data */ > + rc = regex_writef(re, bin_file); > if (rc < 0) > goto err; > - > - /* write the number of bytes in the pcre data */ > - to_write = size; > - len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file); > - if (len != 1) > - goto err; > - > - /* write the actual pcre data as a char array */ > - len = fwrite(re, 1, to_write, bin_file); > - if (len != to_write) > - goto err; > - > - if (sd) { > - /* determine the size of the pcre study info */ > - rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size); > - if (rc < 0) > - goto err; > - } else > - size = 0; > - > - /* write the number of bytes in the pcre study data */ > - to_write = size; > - len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file); > - if (len != 1) > - goto err; > - > - if (sd) { > - /* write the actual pcre study data as a char array */ > - len = fwrite(sd->study_data, 1, to_write, bin_file); > - if (len != to_write) > - goto err; > - } > } > > rc = 0; > @@ -270,8 +240,7 @@ static void free_specs(struct saved_data *data) > free(specs[i].lr.ctx_trans); > free(specs[i].regex_str); > free(specs[i].type_str); > - pcre_free(specs[i].regex); > - pcre_free_study(specs[i].sd); > + regex_data_free(specs[i].regex); > } > free(specs); > > -- > 2.8.0.rc3.226.g39d4020
diff --git a/libselinux/Makefile b/libselinux/Makefile index 6142b60..15d051e 100644 --- a/libselinux/Makefile +++ b/libselinux/Makefile @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y) endif export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS +USE_PCRE2 ?= n +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n +ifeq ($(USE_PCRE2),y) + PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8 + ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y) + PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS + endif + PCRE_LDFLAGS := -lpcre2-8 +else + PCRE_LDFLAGS := -lpcre +endif +export PCRE_CFLAGS PCRE_LDFLAGS + all install relabel clean distclean indent: @for subdir in $(SUBDIRS); do \ (cd $$subdir && $(MAKE) $@) || exit 1; \ diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile index 37d01af..66687e6 100644 --- a/libselinux/src/Makefile +++ b/libselinux/src/Makefile @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \ -Werror -Wno-aggregate-return -Wno-redundant-decls -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS) SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \ -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations @@ -113,7 +113,7 @@ $(LIBA): $(OBJS) $(RANLIB) $@ $(LIBSO): $(LOBJS) - $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro + $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro ln -sf $@ $(TARGET) $(LIBPC): $(LIBPC).in ../VERSION diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c index c89bb35..e41c351 100644 --- a/libselinux/src/label_file.c +++ b/libselinux/src/label_file.c @@ -15,7 +15,6 @@ #include <errno.h> #include <limits.h> #include <stdint.h> -#include <pcre.h> #include <unistd.h> #include <sys/mman.h> #include <sys/types.h> @@ -112,6 +111,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path, struct mmap_area *mmap_area; uint32_t i, magic, version; uint32_t entry_len, stem_map_len, regex_array_len; + const char *reg_version; if (isbinary) { len = strlen(path); @@ -175,8 +175,13 @@ static int load_mmap(struct selabel_handle *rec, const char *path, if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS) return -1; + reg_version = regex_version(); + if (!reg_version) + return -1; + if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) { - len = strlen(pcre_version()); + + len = strlen(reg_version); rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); if (rc < 0) @@ -198,7 +203,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path, } str_buf[entry_len] = '\0'; - if ((strcmp(str_buf, pcre_version()) != 0)) { + if ((strcmp(str_buf, reg_version) != 0)) { free(str_buf); return -1; } @@ -278,7 +283,6 @@ static int load_mmap(struct selabel_handle *rec, const char *path, spec = &data->spec_arr[data->nspec]; spec->from_mmap = 1; - spec->regcomp = 1; /* Process context */ rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); @@ -364,47 +368,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path, spec->prefix_len = prefix_len; } - /* Process regex and study_data entries */ - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); - if (rc < 0 || !entry_len) { - rc = -1; - goto err; - } - spec->regex = (pcre *)mmap_area->next_addr; - rc = next_entry(NULL, mmap_area, entry_len); + rc = regex_load_mmap(mmap_area, &spec->regex); if (rc < 0) goto err; - /* Check that regex lengths match. pcre_fullinfo() - * also validates its magic number. */ - rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len); - if (rc < 0 || len != entry_len) { - rc = -1; - goto err; - } - - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); - if (rc < 0 || !entry_len) { - rc = -1; - goto err; - } - - if (entry_len) { - spec->lsd.study_data = (void *)mmap_area->next_addr; - spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA; - rc = next_entry(NULL, mmap_area, entry_len); - if (rc < 0) - goto err; - - /* Check that study data lengths match. */ - rc = pcre_fullinfo(spec->regex, &spec->lsd, - PCRE_INFO_STUDYSIZE, &len); - if (rc < 0 || len != entry_len) { - rc = -1; - goto err; - } - } - data->nspec++; } @@ -605,14 +572,11 @@ static void closef(struct selabel_handle *rec) spec = &data->spec_arr[i]; free(spec->lr.ctx_trans); free(spec->lr.ctx_raw); + regex_data_free(spec->regex); if (spec->from_mmap) continue; free(spec->regex_str); free(spec->type_str); - if (spec->regcomp) { - pcre_free(spec->regex); - pcre_free_study(spec->sd); - } } for (i = 0; i < (unsigned int)data->num_stems; i++) { @@ -644,13 +608,14 @@ static struct spec *lookup_common(struct selabel_handle *rec, { struct saved_data *data = (struct saved_data *)rec->data; struct spec *spec_arr = data->spec_arr; - int i, rc, file_stem, pcre_options = 0; + int i, rc, file_stem; mode_t mode = (mode_t)type; const char *buf; struct spec *ret = NULL; char *clean_key = NULL; const char *prev_slash, *next_slash; unsigned int sofar = 0; + struct regex_error_data regex_error_data; if (!data->nspec) { errno = ENOENT; @@ -677,9 +642,6 @@ static struct spec *lookup_common(struct selabel_handle *rec, file_stem = find_stem_from_file(data, &buf); mode &= S_IFMT; - if (partial) - pcre_options |= PCRE_PARTIAL_SOFT; - /* * Check for matching specifications in reverse order, so that * the last matching specification is used. @@ -692,25 +654,19 @@ static struct spec *lookup_common(struct selabel_handle *rec, * a regex check */ if ((spec->stem_id == -1 || spec->stem_id == file_stem) && (!mode || !spec->mode || mode == spec->mode)) { - if (compile_regex(data, spec, NULL) < 0) + if (compile_regex(data, spec, ®ex_error_data) < 0) goto finish; if (spec->stem_id == -1) - rc = pcre_exec(spec->regex, - get_pcre_extra(spec), - key, strlen(key), 0, - pcre_options, NULL, 0); + rc = regex_match(spec->regex, key, partial); else - rc = pcre_exec(spec->regex, - get_pcre_extra(spec), - buf, strlen(buf), 0, - pcre_options, NULL, 0); - if (rc == 0) { + rc = regex_match(spec->regex, buf, partial); + if (rc == REGEX_MATCH) { spec->matches++; break; - } else if (partial && rc == PCRE_ERROR_PARTIAL) + } else if (partial && rc == REGEX_MATCH_PARTIAL) break; - if (rc == PCRE_ERROR_NOMATCH) + if (rc == REGEX_NO_MATCH) continue; errno = ENOENT; @@ -849,17 +805,10 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1, continue; } - if (spec1->regcomp && spec2->regcomp) { - size_t len1, len2; - int rc; - - rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1); - assert(rc == 0); - rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2); - assert(rc == 0); - if (len1 != len2 || - memcmp(spec1->regex, spec2->regex, len1)) + if (spec1->regex && spec2->regex) { + if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){ return incomp(spec1, spec2, "regex", i, j); + } } else { if (strcmp(spec1->regex_str, spec2->regex_str)) return incomp(spec1, spec2, "regex_str", i, j); diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h index 6d1e890..24cb9e0 100644 --- a/libselinux/src/label_file.h +++ b/libselinux/src/label_file.h @@ -6,6 +6,14 @@ #include <sys/stat.h> +/* + * regex.h/c were introduced to hold all dependencies on the regular + * expression back-end when we started supporting PCRE2. regex.h defines a + * minimal interface required by libselinux, so that the remaining code + * can be agnostic about the underlying implementation. + */ +#include "regex.h" + #include "callbacks.h" #include "label_internal.h" @@ -19,26 +27,16 @@ #define SELINUX_COMPILED_FCONTEXT_MAX_VERS SELINUX_COMPILED_FCONTEXT_PREFIX_LEN -/* Prior to version 8.20, libpcre did not have pcre_free_study() */ -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) -#define pcre_free_study pcre_free -#endif - /* A file security context specification. */ struct spec { struct selabel_lookup_rec lr; /* holds contexts for lookup result */ char *regex_str; /* regular expession string for diagnostics */ char *type_str; /* type string for diagnostic messages */ - pcre *regex; /* compiled regular expression */ - union { - pcre_extra *sd; /* pointer to extra compiled stuff */ - pcre_extra lsd; /* used to hold the mmap'd version */ - }; + struct regex_data * regex; /* backend dependent regular expression data */ mode_t mode; /* mode format value */ int matches; /* number of matching pathnames */ int stem_id; /* indicates which stem-compression item */ char hasMetaChars; /* regular expression has meta-chars */ - char regcomp; /* regex_str has been compiled to regex */ char from_mmap; /* this spec is from an mmap of the data */ size_t prefix_len; /* length of fixed path prefix */ }; @@ -78,17 +76,6 @@ struct saved_data { struct mmap_area *mmap_areas; }; -static inline pcre_extra *get_pcre_extra(struct spec *spec) -{ - if (spec->from_mmap) { - if (spec->lsd.study_data) - return &spec->lsd; - else - return NULL; - } else - return spec->sd; -} - static inline mode_t string_to_mode(char *mode) { size_t len; @@ -331,15 +318,14 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes) } static inline int compile_regex(struct saved_data *data, struct spec *spec, - const char **errbuf) + struct regex_error_data * error_data) { - const char *tmperrbuf; char *reg_buf, *anchored_regex, *cp; struct stem *stem_arr = data->stem_arr; size_t len; - int erroff; + int rc; - if (spec->regcomp) + if (spec->regex) return 0; /* already done */ /* Skip the fixed stem. */ @@ -361,25 +347,13 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec, *cp = '\0'; /* Compile the regular expression. */ - spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf, - &erroff, NULL); + rc = regex_prepare_data(&spec->regex, anchored_regex, error_data); free(anchored_regex); - if (!spec->regex) { - if (errbuf) - *errbuf = tmperrbuf; - return -1; - } - - spec->sd = pcre_study(spec->regex, 0, &tmperrbuf); - if (!spec->sd && tmperrbuf) { - if (errbuf) - *errbuf = tmperrbuf; + if (rc < 0) { return -1; } /* Done. */ - spec->regcomp = 1; - return 0; } @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec, struct saved_data *data = (struct saved_data *)rec->data; struct spec *spec_arr; unsigned int nspec = data->nspec; - const char *errbuf = NULL; + char const *errbuf; + struct regex_error_data error_data; items = read_spec_entries(line_buf, &errbuf, 3, ®ex, &type, &context); if (items < 0) { @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec, data->nspec++; if (rec->validating && - compile_regex(data, &spec_arr[nspec], &errbuf)) { + compile_regex(data, &spec_arr[nspec], &error_data)) { COMPAT_LOG(SELINUX_ERROR, "%s: line %u has invalid regex %s: %s\n", path, lineno, regex, diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c new file mode 100644 index 0000000..558a72a --- /dev/null +++ b/libselinux/src/regex.c @@ -0,0 +1,461 @@ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "regex.h" +#include "label_file.h" + +#ifdef USE_PCRE2 +int regex_prepare_data(struct regex_data ** regex, + char const * pattern_string, + struct regex_error_data * errordata) { + memset(errordata, 0, sizeof(struct regex_error_data)); + + *regex = regex_data_create(); + if (!(*regex)) + return -1; + + (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string, + PCRE2_ZERO_TERMINATED, + PCRE2_DOTALL, + &errordata->error_code, + &errordata->error_offset, NULL); + if (!(*regex)->regex) { + goto err; + } + + (*regex)->match_data = + pcre2_match_data_create_from_pattern((*regex)->regex, NULL); + if (!(*regex)->match_data) { + goto err; + } + return 0; + +err: regex_data_free(*regex); + *regex = NULL; + return -1; +} + +char const * regex_version(void) { + static char version_buf[256]; + size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL); + if (len <= 0 || len > sizeof(version_buf)) + return NULL; + + pcre2_config(PCRE2_CONFIG_VERSION, version_buf); + return version_buf; +} + +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { + int rc; + size_t entry_len; + + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); + if (rc < 0) + return -1; + + if (entry_len) { + /* + * this should yield exactly one because we store one pattern at a time + */ + rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); + if (rc != 1) + return -1; + + *regex = regex_data_create(); + if (!*regex) + return -1; + + rc = pcre2_serialize_decode(&(*regex)->regex, 1, + (PCRE2_SPTR)mmap_area->next_addr, NULL); + if (rc != 1) + goto err; + + (*regex)->match_data = + pcre2_match_data_create_from_pattern((*regex)->regex, NULL); + if (!(*regex)->match_data) + goto err; + } + + /* and skip the decoded bit */ + rc = next_entry(NULL, mmap_area, entry_len); + if (rc < 0) + goto err; + + return 0; +err: + regex_data_free(*regex); + *regex = NULL; + return -1; +} + +int regex_writef(struct regex_data * regex, FILE * fp) { + int rc = 0; + size_t len; + PCRE2_SIZE to_write; + PCRE2_UCHAR * bytes = NULL; +#ifndef NO_PERSISTENTLY_STORED_PATTERNS + int do_write_patterns = 1; +#else + int do_write_patterns = 0; +#endif + + if (do_write_patterns) { + /* encode the patter for serialization */ + rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex, 1, + &bytes, &to_write, NULL); + if (rc != 1) { + rc = -1; + goto err; + } + } else { + to_write = 0; + } + + /* write serialized pattern's size */ + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); + if (len != 1) { + rc = -1; + goto err; + } + + if (do_write_patterns) { + /* write serialized pattern */ + len = fwrite(bytes, 1, to_write, fp); + if (len != to_write) { + rc = -1; + } + } + +err: + if (bytes) + pcre2_serialize_free(bytes); + + return rc; +} + +void regex_data_free(struct regex_data * regex) { + if (regex) { + if (regex->regex) { + pcre2_code_free(regex->regex); + } + if (regex->match_data) { + pcre2_match_data_free(regex->match_data); + } + free(regex); + } +} + +int regex_match(struct regex_data * regex, char const * subject, int partial) { + int rc; + rc = pcre2_match(regex->regex, + (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, + partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, + NULL); + if (rc > 0) + return REGEX_MATCH; + switch (rc) { + case PCRE2_ERROR_PARTIAL: + return REGEX_MATCH_PARTIAL; + case PCRE2_ERROR_NOMATCH: + return REGEX_NO_MATCH; + default: + return REGEX_ERROR; + } +} + +/* + * TODO Replace this compare function with something that actually compares the + * regular expressions. + * This compare function basically just compares the binary representations of + * the automatons, and because this representation contains pointers and + * metadata, it can only return a match if regex1 == regex2. + * Preferably, this function would be replaced with an algorithm that computes + * the equivalence of the automatons systematically. + */ +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { + int rc; + size_t len1, len2; + rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); + assert(rc == 0); + rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); + assert(rc == 0); + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) + return SELABEL_INCOMPARABLE; + + return SELABEL_EQUAL; +} + +#else // !USE_PCRE2 + +int regex_prepare_data(struct regex_data ** regex, + char const * pattern_string, + struct regex_error_data * errordata) { + memset(errordata, 0, sizeof(struct regex_error_data)); + + *regex = regex_data_create(); + if (!(*regex)) + return -1; + + (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL, + &errordata->error_buffer, + &errordata->error_offset, NULL); + if (!(*regex)->regex) { + goto err; + } + (*regex)->owned = 1; + + (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); + if (!(*regex)->sd && errordata->error_buffer) { + goto err; + } + + return 0; + +err: regex_data_free(*regex); + *regex = NULL; + return -1; +} + +char const * regex_version(void) { + return pcre_version(); +} + +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { + int rc; + size_t entry_len, info_len; + + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); + if (rc < 0 || !entry_len) { + return -1; + } + *regex = regex_data_create(); + if (!(*regex)) + return -1; + + (*regex)->owned = 0; + (*regex)->regex = (pcre *) mmap_area->next_addr; + rc = next_entry(NULL, mmap_area, entry_len); + if (rc < 0) + goto err; + + /* + * Check that regex lengths match. pcre_fullinfo() + * also validates its magic number. + */ + rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); + if (rc < 0 || info_len != entry_len) { + goto err; + } + + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); + if (rc < 0 || !entry_len) { + goto err; + } + + if (entry_len) { + (*regex)->lsd.study_data = (void *) mmap_area->next_addr; + (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; + rc = next_entry(NULL, mmap_area, entry_len); + if (rc < 0) + goto err; + + /* Check that study data lengths match. */ + rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, + PCRE_INFO_STUDYSIZE, &info_len); + if (rc < 0 || info_len != entry_len) + goto err; + } + return 0; + +err: + regex_data_free(*regex); + *regex = NULL; + return -1; +} + +int regex_writef(struct regex_data * regex, FILE * fp) { + int rc; + size_t len; + uint32_t to_write; + size_t size; + pcre_extra * sd = regex->owned ? regex->sd : + (regex->lsd.study_data ? ®ex->lsd : NULL); + + /* determine the size of the pcre data in bytes */ + rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); + if (rc < 0) + return -1; + + /* write the number of bytes in the pcre data */ + to_write = size; + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); + if (len != 1) + return -1; + + /* write the actual pcre data as a char array */ + len = fwrite(regex->regex, 1, to_write, fp); + if (len != to_write) + return -1; + + if (sd) { + /* determine the size of the pcre study info */ + rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, + &size); + if (rc < 0) + return -1; + } else + size = 0; + + /* write the number of bytes in the pcre study data */ + to_write = size; + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); + if (len != 1) + return -1; + + if (sd) { + /* write the actual pcre study data as a char array */ + len = fwrite(sd->study_data, 1, to_write, fp); + if (len != to_write) + return -1; + } + + return 0; +} + +void regex_data_free(struct regex_data * regex) { + if (regex) { + if (regex->owned) { + if (regex->regex) + pcre_free(regex->regex); + if (regex->sd) + pcre_free_study(regex->sd); + } + free(regex); + } +} + +int regex_match(struct regex_data * regex, char const * subject, int partial) { + int rc; + + rc = pcre_exec(regex->regex, regex->owned ? regex->sd : ®ex->lsd, + subject, strlen(subject), 0, + partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); + switch (rc) { + case 0: + return REGEX_MATCH; + case PCRE_ERROR_PARTIAL: + return REGEX_MATCH_PARTIAL; + case PCRE_ERROR_NOMATCH: + return REGEX_NO_MATCH; + default: + return REGEX_ERROR; + } +} + +/* + * TODO Replace this compare function with something that actually compares the + * regular expressions. + * This compare function basically just compares the binary representations of + * the automatons, and because this representation contains pointers and + * metadata, it can only return a match if regex1 == regex2. + * Preferably, this function would be replaced with an algorithm that computes + * the equivalence of the automatons systematically. + */ +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { + int rc; + size_t len1, len2; + rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); + assert(rc == 0); + rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); + assert(rc == 0); + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) + return SELABEL_INCOMPARABLE; + + return SELABEL_EQUAL; +} + +#endif + + +struct regex_data * regex_data_create(void) { + struct regex_data * dummy = (struct regex_data*) malloc( + sizeof(struct regex_data)); + if (dummy) { + memset(dummy, 0, sizeof(struct regex_data)); + } + return dummy; +} + +void regex_format_error(struct regex_error_data const * error_data, + char * buffer, size_t buf_size) { + unsigned the_end_length = buf_size > 4 ? 4 : buf_size; + char * ptr = &buffer[buf_size - the_end_length]; + int rc = 0; + size_t pos = 0; + if (!buffer || !buf_size) + return; + rc = snprintf(buffer, buf_size, "REGEX back-end error: "); + if (rc < 0) + /* + * If snprintf fails it constitutes a logical error that needs + * fixing. + */ + abort(); + + pos += rc; + if (pos >= buf_size) + goto truncated; + + if (error_data->error_offset > 0) { +#ifdef USE_PCRE2 + rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ", + error_data->error_offset); +#else + rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ", + error_data->error_offset); +#endif + if (rc < 0) + abort(); + + } + pos += rc; + if (pos >= buf_size) + goto truncated; + +#ifdef USE_PCRE2 + rc = pcre2_get_error_message(error_data->error_code, + (PCRE2_UCHAR*)(buffer + pos), + buf_size - pos); + if (rc == PCRE2_ERROR_NOMEMORY) + goto truncated; +#else + rc = snprintf(buffer + pos, buf_size - pos, "%s", + error_data->error_buffer); + if (rc < 0) + abort(); + + if ((size_t)rc < strlen(error_data->error_buffer)) + goto truncated; +#endif + + return; + +truncated: + /* replace end of string with "..." to indicate that it was truncated */ + switch (the_end_length) { + /* no break statements, fall-through is intended */ + case 4: + *ptr++ = '.'; + case 3: + *ptr++ = '.'; + case 2: + *ptr++ = '.'; + case 1: + *ptr++ = '\0'; + default: + break; + } + return; +} diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h new file mode 100644 index 0000000..6a113e8 --- /dev/null +++ b/libselinux/src/regex.h @@ -0,0 +1,169 @@ +#ifndef SRC_REGEX_H_ +#define SRC_REGEX_H_ + +#include <stdio.h> + +#ifdef USE_PCRE2 +#include <pcre2.h> +#else +#include <pcre.h> +#endif + +enum { + REGEX_MATCH, + REGEX_MATCH_PARTIAL, + REGEX_NO_MATCH, + REGEX_ERROR = -1, +}; + +#ifdef USE_PCRE2 +struct regex_data { + pcre2_code * regex; /* compiled regular expression */ + pcre2_match_data * match_data; /* match data block required for the compiled + pattern in regex2 */ +}; + +struct regex_error_data { + int error_code; + PCRE2_SIZE error_offset; +}; + +/* ^^^^^^ USE_PCRE2 ^^^^^^ */ +#else +/* vvvvvv USE_PCRE vvvvvv */ + +/* Prior to version 8.20, libpcre did not have pcre_free_study() */ +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) +#define pcre_free_study pcre_free +#endif + +struct regex_data { + int owned; /* + * non zero if regex and pcre_extra is owned by this + * structure and thus must be freed on destruction. + */ + pcre *regex; /* compiled regular expression */ + union { + pcre_extra *sd; /* pointer to extra compiled stuff */ + pcre_extra lsd; /* used to hold the mmap'd version */ + }; +}; + +struct regex_error_data { + char const * error_buffer; + int error_offset; +}; + +#endif /* USE_PCRE2 */ + +struct mmap_area; + +/** + * regex_verison returns the version string of the underlying regular + * regular expressions library. In the case of PCRE it just returns the + * result of pcre_version(). In the case of PCRE2, the very first time this + * function is called it allocates a buffer large enough to hold the version + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer. + * The allocated buffer will linger in memory until the calling process is being + * reaped. + * + * It may return NULL on error. + */ +char const * regex_version(void); +/** + * This constructor function allocates a buffer for a regex_data structure. + * The buffer is being initialized with zeroes. + */ +struct regex_data * regex_data_create(void); +/** + * This complementary destructor function frees the a given regex_data buffer. + * It also frees any non NULL member pointers with the appropriate pcreX_X_free + * function. For PCRE this function respects the extra_owned field and frees + * the pcre_extra data conditionally. Calling this function on a NULL pointer is + * save. + */ +void regex_data_free(struct regex_data * regex); +/** + * This function compiles the regular expression. Additionally, it prepares + * data structures required by the different underlying engines. For PCRE + * it calls pcre_study to generate optional data required for optimized + * execution of the compiled pattern. In the case of PCRE2, it allocates + * a pcre2_match_data structure of appropriate size to hold all possible + * matches created by the pattern. + * + * @arg regex If successful, the structure returned through *regex was allocated + * with regex_data_create and must be freed with regex_data_free. + * @arg pattern_string The pattern string that is to be compiled. + * @arg errordata A pointer to a regex_error_data structure must be passed + * to this function. This structure depends on the underlying + * implementation. It can be passed to regex_format_error + * to generate a human readable error message. + * @retval 0 on success + * @retval -1 on error + */ +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string, + struct regex_error_data * errordata); +/** + * This function loads a serialized precompiled pattern from a contiguous + * data region given by map_area. + * + * @arg map_area Description of the memory region holding a serialized + * representation of the precompiled pattern. + * @arg regex If successful, the structure returned through *regex was allocated + * with regex_data_create and must be freed with regex_data_free. + * + * @retval 0 on success + * @retval -1 on error + */ +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex); +/** + * This function stores a precompiled regular expression to a file. + * In the case of PCRE, it just dumps the binary representation of the + * precomplied pattern into a file. In the case of PCRE2, it uses the + * serialization function provided by the library. + * + * @arg regex The precomplied regular expression data. + * @arg fp A file stream specifying the output file. + */ +int regex_writef(struct regex_data * regex, FILE * fp); +/** + * This function applies a precompiled pattern to a subject string and + * returns whether or not a match was found. + * + * @arg regex The precompiled pattern. + * @arg subject The subject string. + * @arg partial Boolean indicating if partial matches are wanted. A nonzero + * value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as + * option to pcre_exec of pcre2_match. + * @retval REGEX_MATCH if a match was found + * @retval REGEX_MATCH_PARTIAL if a partial match was found + * @retval REGEX_NO_MATCH if no match was found + * @retval REGEX_ERROR if an error was encountered during the execution of the + * regular expression + */ +int regex_match(struct regex_data * regex, char const * subject, int partial); +/** + * This function compares two compiled regular expressions (regex1 and regex2). + * It compares the binary representations of the compiled patterns. It is a very + * crude approximation because the binary representation holds data like + * reference counters, that has nothing to do with the actual state machine. + * + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly + * the same + * @retval SELABEL_INCOMPARABLE otherwise + */ +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2); +/** + * This function takes the error data returned by regex_prepare_data and turns + * it in to a human readable error message. + * If the buffer given to hold the error message is to small it truncates the + * message and indicates the truncation with an ellipsis ("...") at the end of + * the buffer. + * + * @arg error_data Error data as returned by regex_prepare_data. + * @arg buffer String buffer to hold the formated error string. + * @arg buf_size Total size of the given bufer in bytes. + */ +void regex_format_error(struct regex_error_data const * error_data, + char * buffer, size_t buf_size); +#endif /* SRC_REGEX_H_ */ diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile index 8497cb4..1e7a048 100644 --- a/libselinux/utils/Makefile +++ b/libselinux/utils/Makefile @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \ -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \ -Werror -Wno-aggregate-return -Wno-redundant-decls -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS) LDLIBS += -L../src -lselinux -L$(LIBDIR) TARGETS=$(patsubst %.c,%,$(wildcard *.c)) -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol selinux_restorecon: LDLIBS += -lsepol diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c index fd6fb78..b6b8d92 100644 --- a/libselinux/utils/sefcontext_compile.c +++ b/libselinux/utils/sefcontext_compile.c @@ -1,6 +1,5 @@ #include <ctype.h> #include <errno.h> -#include <pcre.h> #include <stdint.h> #include <stdio.h> #include <string.h> @@ -13,6 +12,7 @@ #include <sepol/sepol.h> #include "../src/label_file.h" +#include "../src/regex.h" const char *policy_file; static int ctx_err; @@ -101,6 +101,7 @@ static int write_binary_file(struct saved_data *data, int fd) uint32_t section_len; uint32_t i; int rc; + const char *reg_version; bin_file = fdopen(fd, "w"); if (!bin_file) { @@ -119,12 +120,15 @@ static int write_binary_file(struct saved_data *data, int fd) if (len != 1) goto err; - /* write the pcre version */ - section_len = strlen(pcre_version()); + /* write version of the regex back-end */ + reg_version = regex_version(); + if (!reg_version) + goto err; + section_len = strlen(reg_version); len = fwrite(§ion_len, sizeof(uint32_t), 1, bin_file); if (len != 1) goto err; - len = fwrite(pcre_version(), sizeof(char), section_len, bin_file); + len = fwrite(reg_version, sizeof(char), section_len, bin_file); if (len != section_len) goto err; @@ -162,10 +166,8 @@ static int write_binary_file(struct saved_data *data, int fd) mode_t mode = specs[i].mode; size_t prefix_len = specs[i].prefix_len; int32_t stem_id = specs[i].stem_id; - pcre *re = specs[i].regex; - pcre_extra *sd = get_pcre_extra(&specs[i]); + struct regex_data *re = specs[i].regex; uint32_t to_write; - size_t size; /* length of the context string (including nul) */ to_write = strlen(context) + 1; @@ -212,42 +214,10 @@ static int write_binary_file(struct saved_data *data, int fd) if (len != 1) goto err; - /* determine the size of the pcre data in bytes */ - rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); + /* Write regex related data */ + rc = regex_writef(re, bin_file); if (rc < 0) goto err; - - /* write the number of bytes in the pcre data */ - to_write = size; - len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file); - if (len != 1) - goto err; - - /* write the actual pcre data as a char array */ - len = fwrite(re, 1, to_write, bin_file); - if (len != to_write) - goto err; - - if (sd) { - /* determine the size of the pcre study info */ - rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size); - if (rc < 0) - goto err; - } else - size = 0; - - /* write the number of bytes in the pcre study data */ - to_write = size; - len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file); - if (len != 1) - goto err; - - if (sd) { - /* write the actual pcre study data as a char array */ - len = fwrite(sd->study_data, 1, to_write, bin_file); - if (len != to_write) - goto err; - } } rc = 0; @@ -270,8 +240,7 @@ static void free_specs(struct saved_data *data) free(specs[i].lr.ctx_trans); free(specs[i].regex_str); free(specs[i].type_str); - pcre_free(specs[i].regex); - pcre_free_study(specs[i].sd); + regex_data_free(specs[i].regex); } free(specs);