diff mbox series

[v3,10/13] strmap: add a strset sub-type

Message ID 0f57735f5e30ad61a2e6fdb118067afbcea69660.1604343314.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Add struct strmap and associated utility functions | expand

Commit Message

Elijah Newren Nov. 2, 2020, 6:55 p.m. UTC
From: Elijah Newren <newren@gmail.com>

Similar to adding strintmap for special-casing a string -> int mapping,
add a strset type for cases where we really are only interested in using
strmap for storing a set rather than a mapping.  In this case, we'll
always just store NULL for the value but the different struct type makes
it clearer than code comments how a variable is intended to be used.

The difference in usage also results in some differences in API: a few
things that aren't necessary or meaningful are dropped (namely, the
free_values argument to *_clear(), and the *_get() function), and
strset_add() is chosen as the API instead of strset_put().

Finally, shortlog already had a more minimal strset API; so this adds a
strset_check_and_add() function for its benefit to allow it to switch
over to this strset implementation.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 strmap.c |  8 +++++++
 strmap.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

Comments

Jeff King Nov. 4, 2020, 8:31 p.m. UTC | #1
On Mon, Nov 02, 2020 at 06:55:10PM +0000, Elijah Newren via GitGitGadget wrote:

> +int strset_check_and_add(struct strset *set, const char *str)
> +{
> +	if (strset_contains(set, str))
> +		return 1;
> +	strset_add(set, str);
> +	return 0;
> +}

With this implementation, I wonder if it is worth having such a
specialized function. The value of an atomic check-and-add operation is
that it can reuse the effort to hash the string for both operations (it
could also reuse any open-table probing effort, but for a chained hash
like our implementation, it's cheap to add a new entry to the front of
the list).

I doubt it matters all that much for the use case in shortlog. Perhaps
we should just open-code it there for now, and we can revisit it if
another user comes up.

> --- a/strmap.h
> +++ b/strmap.h
> @@ -28,6 +28,10 @@ int cmp_strmap_entry(const void *hashmap_cmp_fn_data,
>  			.map.strdup_strings = 1,                          \
>  			.default_value = 0,                               \
>  		    }
> +#define STRSET_INIT { \
> +			.map.map = HASHMAP_INIT(cmp_strmap_entry, NULL),  \
> +			.map.strdup_strings = 1,                          \
> +		    }

As with strint, this could be:

  #define STRSET_INIT { .map = STRMAP_INIT }

-Peff
diff mbox series

Patch

diff --git a/strmap.c b/strmap.c
index 0d10a884b5..2aff985f40 100644
--- a/strmap.c
+++ b/strmap.c
@@ -134,3 +134,11 @@  void strintmap_incr(struct strintmap *map, const char *str, intptr_t amt)
 	else
 		strintmap_set(map, str, map->default_value + amt);
 }
+
+int strset_check_and_add(struct strset *set, const char *str)
+{
+	if (strset_contains(set, str))
+		return 1;
+	strset_add(set, str);
+	return 0;
+}
diff --git a/strmap.h b/strmap.h
index 31474f781e..fca1e9f639 100644
--- a/strmap.h
+++ b/strmap.h
@@ -28,6 +28,10 @@  int cmp_strmap_entry(const void *hashmap_cmp_fn_data,
 			.map.strdup_strings = 1,                          \
 			.default_value = 0,                               \
 		    }
+#define STRSET_INIT { \
+			.map.map = HASHMAP_INIT(cmp_strmap_entry, NULL),  \
+			.map.strdup_strings = 1,                          \
+		    }
 
 /*
  * Initialize the members of the strmap.  Any keys added to the strmap will
@@ -200,4 +204,71 @@  static inline void strintmap_set(struct strintmap *map, const char *str,
  */
 void strintmap_incr(struct strintmap *map, const char *str, intptr_t amt);
 
+/*
+ * strset:
+ *    A set of strings.
+ *
+ * Primary differences with strmap:
+ *    1) The value is always NULL, and ignored.  As there is no value to free,
+ *       there is one fewer argument to strset_clear
+ *    2) No strset_get() because there is no value.
+ *    3) No strset_put(); use strset_add() instead.
+ */
+
+struct strset {
+	struct strmap map;
+};
+
+#define strset_for_each_entry(mystrset, iter, var)	\
+	strmap_for_each_entry(&(mystrset)->map, iter, var)
+
+static inline void strset_init(struct strset *set)
+{
+	strmap_init(&set->map);
+}
+
+static inline void strset_init_with_options(struct strset *set,
+					    int strdup_strings)
+{
+	strmap_init_with_options(&set->map, strdup_strings);
+}
+
+static inline void strset_clear(struct strset *set)
+{
+	strmap_clear(&set->map, 0);
+}
+
+static inline void strset_partial_clear(struct strset *set)
+{
+	strmap_partial_clear(&set->map, 0);
+}
+
+static inline int strset_contains(struct strset *set, const char *str)
+{
+	return strmap_contains(&set->map, str);
+}
+
+static inline void strset_remove(struct strset *set, const char *str)
+{
+	return strmap_remove(&set->map, str, 0);
+}
+
+static inline int strset_empty(struct strset *set)
+{
+	return strmap_empty(&set->map);
+}
+
+static inline unsigned int strset_get_size(struct strset *set)
+{
+	return strmap_get_size(&set->map);
+}
+
+static inline void strset_add(struct strset *set, const char *str)
+{
+	strmap_put(&set->map, str, NULL);
+}
+
+/* Returns 1 if str already in set.  Otherwise adds str to set and returns 0 */
+int strset_check_and_add(struct strset *set, const char *str);
+
 #endif /* STRMAP_H */