diff mbox series

libselinux: avoid quadratic complexity for many regex specs validation

Message ID 20241230140323.58852-1-cgoettsche@seltendoof.de (mailing list archive)
State New
Delegated to: Petr Lautrbach
Headers show
Series libselinux: avoid quadratic complexity for many regex specs validation | expand

Commit Message

Christian Göttsche Dec. 30, 2024, 2:03 p.m. UTC
From: Christian Göttsche <cgzones@googlemail.com>

In the degenerate case of many regular expression specifications in a
single node switch to a n*log(n) algorithm (with allocation) instead of
the default n^2 (without allocation) one.

See 2c7b71db ("libselinux: performance optimization for duplicate detection")
for a predecessor.

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
---
 libselinux/src/label_file.c | 85 +++++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 3 deletions(-)

Comments

James Carter Jan. 8, 2025, 9:50 p.m. UTC | #1
On Mon, Dec 30, 2024 at 9:03 AM Christian Göttsche
<cgoettsche@seltendoof.de> wrote:
>
> From: Christian Göttsche <cgzones@googlemail.com>
>
> In the degenerate case of many regular expression specifications in a
> single node switch to a n*log(n) algorithm (with allocation) instead of
> the default n^2 (without allocation) one.
>
> See 2c7b71db ("libselinux: performance optimization for duplicate detection")
> for a predecessor.
>
> Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
> ---
>  libselinux/src/label_file.c | 85 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 82 insertions(+), 3 deletions(-)
>
> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> index 56e20949..b1884e80 100644
> --- a/libselinux/src/label_file.c
> +++ b/libselinux/src/label_file.c
> @@ -102,6 +102,38 @@ void sort_spec_node(struct spec_node *node, struct spec_node *parent)
>                 sort_spec_node(&node->children[i], node);
>  }
>
> +static inline int compare_regex_spec(const void *p1, const void *p2)
> +{
> +       const struct regex_spec *r1 = p1;
> +       const struct regex_spec *r2 = p2;
> +       size_t regex_len1, regex_len2;
> +       int ret;
> +
> +       /* Order from high prefix length to low */
> +       ret = (r1->prefix_len < r2->prefix_len) - (r1->prefix_len > r2->prefix_len);
> +       if (ret)
> +               return ret;
> +
> +       /* Order from long total regex length to short */
> +       regex_len1 = strlen(r1->regex_str);
> +       regex_len2 = strlen(r2->regex_str);
> +       ret = (regex_len1 < regex_len2) - (regex_len1 > regex_len2);
> +       if (ret)
> +               return ret;
> +
> +       /*
> +        * Order for no-duplicates check.
> +        * Use reverse alphabetically order to retain the Fedora ordering of
> +        * `/usr/(.* /)?lib(/.*)?` before `/usr/(.* /)?bin(/.*)?`.
> +        */
> +       ret = strcmp(r1->regex_str, r2->regex_str);
> +       if (ret)
> +               return -ret;
> +
> +       /* Order wildcard mode (0) last */
> +       return (r1->file_kind < r2->file_kind) - (r1->file_kind > r2->file_kind);
> +}
> +

This function was just deleted when the fix was applied to restore
regex spec ordering. I am a little worried about breaking that
ordering again.

I am also wondering if this is addressing an actual problem. A quick
test on my system shows a max value for node->regex_specs_num of 347
and only eight instances of it being more than 100.

Thanks,
Jim

>  /*
>   * Warn about duplicate specifications.
>   */
> @@ -143,10 +175,18 @@ static int nodups_spec_node(const struct spec_node *node, const char *path)
>         }
>
>         if (node->regex_specs_num > 1) {
> -               for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
> -                       for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
> +               if (node->regex_specs_num > 512) {
> +                       uint32_t num = node->regex_specs_num;
> +                       struct regex_spec *copy = malloc(num * sizeof(struct regex_spec));
> +                       if (!copy)
> +                               goto default_algo;
> +
> +                       memcpy(copy, node->regex_specs, num * sizeof(struct regex_spec));
> +                       qsort(copy, num, sizeof(struct regex_spec), compare_regex_spec);
> +
> +                       for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
>                                 const struct regex_spec *node1 = &node->regex_specs[i];
> -                               const struct regex_spec *node2 = &node->regex_specs[j + 1];
> +                               const struct regex_spec *node2 = &node->regex_specs[i + 1];
>
>                                 if (node1->prefix_len != node2->prefix_len)
>                                         continue;
> @@ -177,6 +217,45 @@ static int nodups_spec_node(const struct spec_node *node, const char *path)
>                                                         node1->regex_str);
>                                 }
>                         }
> +
> +                       free(copy);
> +               } else {
> +                       default_algo:
> +                       for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
> +                               for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
> +                                       const struct regex_spec *node1 = &node->regex_specs[i];
> +                                       const struct regex_spec *node2 = &node->regex_specs[j + 1];
> +
> +                                       if (node1->prefix_len != node2->prefix_len)
> +                                               continue;
> +
> +                                       if (strcmp(node1->regex_str, node2->regex_str) != 0)
> +                                               continue;
> +
> +                                       if (node1->file_kind != LABEL_FILE_KIND_ALL && node2->file_kind != LABEL_FILE_KIND_ALL && node1->file_kind != node2->file_kind)
> +                                               continue;
> +
> +                                       rc = -1;
> +                                       errno = EINVAL;
> +                                       if (strcmp(node1->lr.ctx_raw, node2->lr.ctx_raw) != 0) {
> +                                               COMPAT_LOG
> +                                                       (SELINUX_ERROR,
> +                                                               "%s: Multiple different specifications for %s %s  (%s and %s).\n",
> +                                                               path,
> +                                                               file_kind_to_string(node1->file_kind),
> +                                                               node1->regex_str,
> +                                                               node1->lr.ctx_raw,
> +                                                               node2->lr.ctx_raw);
> +                                       } else {
> +                                               COMPAT_LOG
> +                                                       (SELINUX_ERROR,
> +                                                               "%s: Multiple same specifications for %s %s.\n",
> +                                                               path,
> +                                                               file_kind_to_string(node1->file_kind),
> +                                                               node1->regex_str);
> +                                       }
> +                               }
> +                       }
>                 }
>         }
>
> --
> 2.45.2
>
>
Christian Göttsche Jan. 14, 2025, 4:11 p.m. UTC | #2
On Wed, 8 Jan 2025 at 22:51, James Carter <jwcart2@gmail.com> wrote:
>
> On Mon, Dec 30, 2024 at 9:03 AM Christian Göttsche
> <cgoettsche@seltendoof.de> wrote:
> >
> > From: Christian Göttsche <cgzones@googlemail.com>
> >
> > In the degenerate case of many regular expression specifications in a
> > single node switch to a n*log(n) algorithm (with allocation) instead of
> > the default n^2 (without allocation) one.
> >
> > See 2c7b71db ("libselinux: performance optimization for duplicate detection")
> > for a predecessor.
> >
> > Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
> > ---
> >  libselinux/src/label_file.c | 85 +++++++++++++++++++++++++++++++++++--
> >  1 file changed, 82 insertions(+), 3 deletions(-)
> >
> > diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> > index 56e20949..b1884e80 100644
> > --- a/libselinux/src/label_file.c
> > +++ b/libselinux/src/label_file.c
> > @@ -102,6 +102,38 @@ void sort_spec_node(struct spec_node *node, struct spec_node *parent)
> >                 sort_spec_node(&node->children[i], node);
> >  }
> >
> > +static inline int compare_regex_spec(const void *p1, const void *p2)
> > +{
> > +       const struct regex_spec *r1 = p1;
> > +       const struct regex_spec *r2 = p2;
> > +       size_t regex_len1, regex_len2;
> > +       int ret;
> > +
> > +       /* Order from high prefix length to low */
> > +       ret = (r1->prefix_len < r2->prefix_len) - (r1->prefix_len > r2->prefix_len);
> > +       if (ret)
> > +               return ret;
> > +
> > +       /* Order from long total regex length to short */
> > +       regex_len1 = strlen(r1->regex_str);
> > +       regex_len2 = strlen(r2->regex_str);
> > +       ret = (regex_len1 < regex_len2) - (regex_len1 > regex_len2);
> > +       if (ret)
> > +               return ret;
> > +
> > +       /*
> > +        * Order for no-duplicates check.
> > +        * Use reverse alphabetically order to retain the Fedora ordering of
> > +        * `/usr/(.* /)?lib(/.*)?` before `/usr/(.* /)?bin(/.*)?`.
> > +        */
> > +       ret = strcmp(r1->regex_str, r2->regex_str);
> > +       if (ret)
> > +               return -ret;
> > +
> > +       /* Order wildcard mode (0) last */
> > +       return (r1->file_kind < r2->file_kind) - (r1->file_kind > r2->file_kind);
> > +}
> > +
>
> This function was just deleted when the fix was applied to restore
> regex spec ordering. I am a little worried about breaking that
> ordering again.

Only a temporary copied array is sorted, not the original data.

> I am also wondering if this is addressing an actual problem. A quick
> test on my system shows a max value for node->regex_specs_num of 347
> and only eight instances of it being more than 100.

True, but I tried to avoid reports like this one:
https://lore.kernel.org/selinux/20230209114253.120485-1-wanghuizhao1@huawei.com/

>
> Thanks,
> Jim
>
> >  /*
> >   * Warn about duplicate specifications.
> >   */
> > @@ -143,10 +175,18 @@ static int nodups_spec_node(const struct spec_node *node, const char *path)
> >         }
> >
> >         if (node->regex_specs_num > 1) {
> > -               for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
> > -                       for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
> > +               if (node->regex_specs_num > 512) {
> > +                       uint32_t num = node->regex_specs_num;
> > +                       struct regex_spec *copy = malloc(num * sizeof(struct regex_spec));
> > +                       if (!copy)
> > +                               goto default_algo;
> > +
> > +                       memcpy(copy, node->regex_specs, num * sizeof(struct regex_spec));
> > +                       qsort(copy, num, sizeof(struct regex_spec), compare_regex_spec);
> > +
> > +                       for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
> >                                 const struct regex_spec *node1 = &node->regex_specs[i];
> > -                               const struct regex_spec *node2 = &node->regex_specs[j + 1];
> > +                               const struct regex_spec *node2 = &node->regex_specs[i + 1];

This should be `&copy[i]` and `&copy[i + 1]` instead of `
&node->regex_specs[i]` and `&node->regex_specs[i + 1]`.

> >
> >                                 if (node1->prefix_len != node2->prefix_len)
> >                                         continue;
> > @@ -177,6 +217,45 @@ static int nodups_spec_node(const struct spec_node *node, const char *path)
> >                                                         node1->regex_str);
> >                                 }
> >                         }
> > +
> > +                       free(copy);
> > +               } else {
> > +                       default_algo:
> > +                       for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
> > +                               for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
> > +                                       const struct regex_spec *node1 = &node->regex_specs[i];
> > +                                       const struct regex_spec *node2 = &node->regex_specs[j + 1];
> > +
> > +                                       if (node1->prefix_len != node2->prefix_len)
> > +                                               continue;
> > +
> > +                                       if (strcmp(node1->regex_str, node2->regex_str) != 0)
> > +                                               continue;
> > +
> > +                                       if (node1->file_kind != LABEL_FILE_KIND_ALL && node2->file_kind != LABEL_FILE_KIND_ALL && node1->file_kind != node2->file_kind)
> > +                                               continue;
> > +
> > +                                       rc = -1;
> > +                                       errno = EINVAL;
> > +                                       if (strcmp(node1->lr.ctx_raw, node2->lr.ctx_raw) != 0) {
> > +                                               COMPAT_LOG
> > +                                                       (SELINUX_ERROR,
> > +                                                               "%s: Multiple different specifications for %s %s  (%s and %s).\n",
> > +                                                               path,
> > +                                                               file_kind_to_string(node1->file_kind),
> > +                                                               node1->regex_str,
> > +                                                               node1->lr.ctx_raw,
> > +                                                               node2->lr.ctx_raw);
> > +                                       } else {
> > +                                               COMPAT_LOG
> > +                                                       (SELINUX_ERROR,
> > +                                                               "%s: Multiple same specifications for %s %s.\n",
> > +                                                               path,
> > +                                                               file_kind_to_string(node1->file_kind),
> > +                                                               node1->regex_str);
> > +                                       }
> > +                               }
> > +                       }
> >                 }
> >         }
> >
> > --
> > 2.45.2
> >
> >
diff mbox series

Patch

diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 56e20949..b1884e80 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -102,6 +102,38 @@  void sort_spec_node(struct spec_node *node, struct spec_node *parent)
 		sort_spec_node(&node->children[i], node);
 }
 
+static inline int compare_regex_spec(const void *p1, const void *p2)
+{
+	const struct regex_spec *r1 = p1;
+	const struct regex_spec *r2 = p2;
+	size_t regex_len1, regex_len2;
+	int ret;
+
+	/* Order from high prefix length to low */
+	ret = (r1->prefix_len < r2->prefix_len) - (r1->prefix_len > r2->prefix_len);
+	if (ret)
+		return ret;
+
+	/* Order from long total regex length to short */
+	regex_len1 = strlen(r1->regex_str);
+	regex_len2 = strlen(r2->regex_str);
+	ret = (regex_len1 < regex_len2) - (regex_len1 > regex_len2);
+	if (ret)
+		return ret;
+
+	/*
+	 * Order for no-duplicates check.
+	 * Use reverse alphabetically order to retain the Fedora ordering of
+	 * `/usr/(.* /)?lib(/.*)?` before `/usr/(.* /)?bin(/.*)?`.
+	 */
+	ret = strcmp(r1->regex_str, r2->regex_str);
+	if (ret)
+		return -ret;
+
+	/* Order wildcard mode (0) last */
+	return (r1->file_kind < r2->file_kind) - (r1->file_kind > r2->file_kind);
+}
+
 /*
  * Warn about duplicate specifications.
  */
@@ -143,10 +175,18 @@  static int nodups_spec_node(const struct spec_node *node, const char *path)
 	}
 
 	if (node->regex_specs_num > 1) {
-		for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
-			for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
+		if (node->regex_specs_num > 512) {
+			uint32_t num = node->regex_specs_num;
+			struct regex_spec *copy = malloc(num * sizeof(struct regex_spec));
+			if (!copy)
+				goto default_algo;
+
+			memcpy(copy, node->regex_specs, num * sizeof(struct regex_spec));
+			qsort(copy, num, sizeof(struct regex_spec), compare_regex_spec);
+
+			for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
 				const struct regex_spec *node1 = &node->regex_specs[i];
-				const struct regex_spec *node2 = &node->regex_specs[j + 1];
+				const struct regex_spec *node2 = &node->regex_specs[i + 1];
 
 				if (node1->prefix_len != node2->prefix_len)
 					continue;
@@ -177,6 +217,45 @@  static int nodups_spec_node(const struct spec_node *node, const char *path)
 							node1->regex_str);
 				}
 			}
+
+			free(copy);
+		} else {
+			default_algo:
+			for (uint32_t i = 0; i < node->regex_specs_num - 1; i++) {
+				for (uint32_t j = i; j < node->regex_specs_num - 1; j++) {
+					const struct regex_spec *node1 = &node->regex_specs[i];
+					const struct regex_spec *node2 = &node->regex_specs[j + 1];
+
+					if (node1->prefix_len != node2->prefix_len)
+						continue;
+
+					if (strcmp(node1->regex_str, node2->regex_str) != 0)
+						continue;
+
+					if (node1->file_kind != LABEL_FILE_KIND_ALL && node2->file_kind != LABEL_FILE_KIND_ALL && node1->file_kind != node2->file_kind)
+						continue;
+
+					rc = -1;
+					errno = EINVAL;
+					if (strcmp(node1->lr.ctx_raw, node2->lr.ctx_raw) != 0) {
+						COMPAT_LOG
+							(SELINUX_ERROR,
+								"%s: Multiple different specifications for %s %s  (%s and %s).\n",
+								path,
+								file_kind_to_string(node1->file_kind),
+								node1->regex_str,
+								node1->lr.ctx_raw,
+								node2->lr.ctx_raw);
+					} else {
+						COMPAT_LOG
+							(SELINUX_ERROR,
+								"%s: Multiple same specifications for %s %s.\n",
+								path,
+								file_kind_to_string(node1->file_kind),
+								node1->regex_str);
+					}
+				}
+			}
 		}
 	}