Message ID | 20230112091408.2880781-1-inseob@google.com (mailing list archive) |
---|---|
State | Accepted, archived |
Delegated to: | Jason Zaman |
Headers | show |
Series | [v2,RESEND] libselinux: Workaround for heap overhead of pcre | expand |
On Thu, Jan 12, 2023 at 06:14:09PM +0900, Inseob Kim wrote: > pcre's behavior is changed so that pcre2_match always allocates heap for > match_data, rather than stack, regardless of size. The heap isn't freed > until explicitly calling pcre2_match_data_free. This new behavior may > result in heap overhead, which may increase the peak memory usage about > a few megabytes. It's because regex_match is first called for regex_data > objects, and then regex_data objects are freed at once. > > To workaround it, free match_data as soon as we call regex_match. It's > fine because libselinux currently doesn't use match_data, but use only > the return value. > > Signed-off-by: Inseob Kim <inseob@google.com> Acked-by: Jason Zaman <jason@perfinion.com> Merged, Thanks! > > --- > v2: > - add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro > - remove match_data from struct regex_data > --- > libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c > index 149a7973..4b4b9f08 100644 > --- a/libselinux/src/regex.c > +++ b/libselinux/src/regex.c > @@ -60,11 +60,13 @@ char const *regex_arch_string(void) > > struct regex_data { > pcre2_code *regex; /* compiled regular expression */ > +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > /* > * match data block required for the compiled > * pattern in pcre2 > */ > pcre2_match_data *match_data; > +#endif > pthread_mutex_t match_mutex; > }; > > @@ -84,11 +86,13 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string, > goto err; > } > > +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > (*regex)->match_data = > pcre2_match_data_create_from_pattern((*regex)->regex, NULL); > if (!(*regex)->match_data) { > goto err; > } > +#endif > return 0; > > err: > @@ -138,10 +142,12 @@ int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, > if (rc != 1) > goto err; > > +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > (*regex)->match_data = > pcre2_match_data_create_from_pattern((*regex)->regex, NULL); > if (!(*regex)->match_data) > goto err; > +#endif > > *regex_compiled = true; > } > @@ -203,8 +209,12 @@ void regex_data_free(struct regex_data *regex) > if (regex) { > if (regex->regex) > pcre2_code_free(regex->regex); > + > +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > if (regex->match_data) > pcre2_match_data_free(regex->match_data); > +#endif > + > __pthread_mutex_destroy(®ex->match_mutex); > free(regex); > } > @@ -213,10 +223,30 @@ void regex_data_free(struct regex_data *regex) > int regex_match(struct regex_data *regex, char const *subject, int partial) > { > int rc; > + pcre2_match_data *match_data; > __pthread_mutex_lock(®ex->match_mutex); > + > +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > + match_data = pcre2_match_data_create_from_pattern( > + regex->regex, NULL); > + if (match_data == NULL) { > + __pthread_mutex_unlock(®ex->match_mutex); > + return REGEX_ERROR; > + } > +#else > + match_data = regex->match_data; > +#endif > + > rc = pcre2_match( > regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, > - partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL); > + partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL); > + > +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH > + // pcre2_match allocates heap and it won't be freed until > + // pcre2_match_data_free, resulting in heap overhead. > + pcre2_match_data_free(match_data); > +#endif > + > __pthread_mutex_unlock(®ex->match_mutex); > if (rc > 0) > return REGEX_MATCH; > -- > 2.39.0.314.g84b9a713c41-goog >
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c index 149a7973..4b4b9f08 100644 --- a/libselinux/src/regex.c +++ b/libselinux/src/regex.c @@ -60,11 +60,13 @@ char const *regex_arch_string(void) struct regex_data { pcre2_code *regex; /* compiled regular expression */ +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH /* * match data block required for the compiled * pattern in pcre2 */ pcre2_match_data *match_data; +#endif pthread_mutex_t match_mutex; }; @@ -84,11 +86,13 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string, goto err; } +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH (*regex)->match_data = pcre2_match_data_create_from_pattern((*regex)->regex, NULL); if (!(*regex)->match_data) { goto err; } +#endif return 0; err: @@ -138,10 +142,12 @@ int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, if (rc != 1) goto err; +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH (*regex)->match_data = pcre2_match_data_create_from_pattern((*regex)->regex, NULL); if (!(*regex)->match_data) goto err; +#endif *regex_compiled = true; } @@ -203,8 +209,12 @@ void regex_data_free(struct regex_data *regex) if (regex) { if (regex->regex) pcre2_code_free(regex->regex); + +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH if (regex->match_data) pcre2_match_data_free(regex->match_data); +#endif + __pthread_mutex_destroy(®ex->match_mutex); free(regex); } @@ -213,10 +223,30 @@ void regex_data_free(struct regex_data *regex) int regex_match(struct regex_data *regex, char const *subject, int partial) { int rc; + pcre2_match_data *match_data; __pthread_mutex_lock(®ex->match_mutex); + +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH + match_data = pcre2_match_data_create_from_pattern( + regex->regex, NULL); + if (match_data == NULL) { + __pthread_mutex_unlock(®ex->match_mutex); + return REGEX_ERROR; + } +#else + match_data = regex->match_data; +#endif + rc = pcre2_match( regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, - partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL); + partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL); + +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH + // pcre2_match allocates heap and it won't be freed until + // pcre2_match_data_free, resulting in heap overhead. + pcre2_match_data_free(match_data); +#endif + __pthread_mutex_unlock(®ex->match_mutex); if (rc > 0) return REGEX_MATCH;
pcre's behavior is changed so that pcre2_match always allocates heap for match_data, rather than stack, regardless of size. The heap isn't freed until explicitly calling pcre2_match_data_free. This new behavior may result in heap overhead, which may increase the peak memory usage about a few megabytes. It's because regex_match is first called for regex_data objects, and then regex_data objects are freed at once. To workaround it, free match_data as soon as we call regex_match. It's fine because libselinux currently doesn't use match_data, but use only the return value. Signed-off-by: Inseob Kim <inseob@google.com> --- v2: - add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro - remove match_data from struct regex_data --- libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-)