diff mbox series

[v2,1/4] uuid: add hash_func and equal_func

Message ID 20230518120258.1394135-2-aesteve@redhat.com (mailing list archive)
State New, archived
Headers show
Series Virtio shared dma-buf | expand

Commit Message

Albert Esteve May 18, 2023, 12:02 p.m. UTC
Add hash and an equal function to uuid module.

Add a couple simple unit tests for new functions,
checking collisions for similar UUIDs in the case
of the hash function, and comparing generated UUIDs
for the equal function.

Signed-off-by: Albert Esteve <aesteve@redhat.com>
---
 include/qemu/uuid.h    |  4 ++++
 tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++
 util/uuid.c            | 38 ++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

Comments

Marc-André Lureau May 20, 2023, 7:23 a.m. UTC | #1
Hi

On Thu, May 18, 2023 at 4:03 PM Albert Esteve <aesteve@redhat.com> wrote:

> Add hash and an equal function to uuid module.
>
> Add a couple simple unit tests for new functions,
> checking collisions for similar UUIDs in the case
> of the hash function, and comparing generated UUIDs
> for the equal function.
>
> Signed-off-by: Albert Esteve <aesteve@redhat.com>
> ---
>  include/qemu/uuid.h    |  4 ++++
>  tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++
>  util/uuid.c            | 38 ++++++++++++++++++++++++++++++++++
>  3 files changed, 88 insertions(+)
>
> diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
> index dc40ee1fc9..136df682c9 100644
> --- a/include/qemu/uuid.h
> +++ b/include/qemu/uuid.h
> @@ -96,4 +96,8 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid);
>
>  QemuUUID qemu_uuid_bswap(QemuUUID uuid);
>
> +uint32_t qemu_uuid_hash(const void *uuid);
> +
> +int qemu_uuid_equal(const void *lhv, const void *rhv);
>

There is already qemu_uuid_is_equal()


> +
>  #endif
> diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
> index c111de5fc1..8c865869d5 100644
> --- a/tests/unit/test-uuid.c
> +++ b/tests/unit/test-uuid.c
> @@ -171,6 +171,50 @@ static void test_uuid_unparse_strdup(void)
>      }
>  }
>
> +static void test_uuid_hash(void)
> +{
> +    QemuUUID uuid;
> +    int i;
> +
> +    for (i = 0; i < 100; i++) {
> +        qemu_uuid_generate(&uuid);
> +        /* Obtain the UUID hash */
> +        uint32_t hash_a = qemu_uuid_hash(&uuid);
> +        int data_idx = g_random_int_range(0, 15);
> +        /* Change a single random byte of the UUID */
> +        if (uuid.data[data_idx] < 0xFF) {
> +            uuid.data[data_idx]++;
> +        } else {
> +            uuid.data[data_idx]--;
> +        }
> +        /* Obtain the UUID hash again */
> +        uint32_t hash_b = qemu_uuid_hash(&uuid);
> +        /*
> +         * Both hashes shall be different (avoid collision)
> +         * for any change in the UUID fields
> +         */
> +        g_assert_cmpint(hash_a, !=, hash_b);
> +    }
> +}
> +
> +static void test_uuid_equal(void)
> +{
> +    QemuUUID uuid_a, uuid_b, uuid_c;
> +    int i;
> +
> +    for (i = 0; i < 100; i++) {
> +        qemu_uuid_generate(&uuid_a);
> +        qemu_uuid_generate(&uuid_b);
> +        memcpy(&uuid_c, &uuid_a, sizeof(uuid_a));
> +
> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_a));
> +        g_assert(qemu_uuid_equal(&uuid_b, &uuid_b));
> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_c));
> +        g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b));
> +        g_assert_false(qemu_uuid_equal(NULL, NULL));
> +    }
> +}
> +
>  int main(int argc, char **argv)
>  {
>      g_test_init(&argc, &argv, NULL);
> @@ -179,6 +223,8 @@ int main(int argc, char **argv)
>      g_test_add_func("/uuid/parse", test_uuid_parse);
>      g_test_add_func("/uuid/unparse", test_uuid_unparse);
>      g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
> +    g_test_add_func("/uuid/hash", test_uuid_hash);
> +    g_test_add_func("/uuid/equal", test_uuid_equal);
>
>      return g_test_run();
>  }
> diff --git a/util/uuid.c b/util/uuid.c
> index b1108dde78..efa9b0a0e4 100644
> --- a/util/uuid.c
> +++ b/util/uuid.c
> @@ -16,6 +16,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/uuid.h"
>  #include "qemu/bswap.h"
> +#include "qemu/xxhash.h"
>
>  void qemu_uuid_generate(QemuUUID *uuid)
>  {
> @@ -116,3 +117,40 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid)
>      bswap16s(&uuid.fields.time_high_and_version);
>      return uuid;
>  }
> +
> +uint32_t qemu_uuid_hash(const void *uuid)
> +{
> +    QemuUUID *id = (QemuUUID *) uuid;
> +    uint64_t ab = (id->fields.time_low) |
> +                  (((uint64_t) id->fields.time_mid) << 32) |
> +                  (((uint64_t) id->fields.time_high_and_version) << 48);
> +    uint64_t cd = (id->fields.clock_seq_and_reserved) |
> +                  (id->fields.clock_seq_low << 8);
> +    int i = 0, shift = 8;
> +
> +    for (; i < 6; i++) {
> +        shift += 8;
> +        cd |= ((uint64_t) id->fields.node[i]) << shift;
> +    }
> +
> +    return qemu_xxhash4(ab, cd);
> +
>

That looks quite complex, and I have no idea if this is a good hash or not.

Instead I would implement the traditional "djb" hash over the char[16] data
(see g_str_hash implementation for \0-terminated implementation)


> }
> +
> +int qemu_uuid_equal(const void *lhv, const void *rhv)
> +{
> +    int i;
> +    QemuUUID *lid = (QemuUUID *) lhv;
> +    QemuUUID *rid = (QemuUUID *) rhv;
> +    if (lid == NULL || rid == NULL) {
> +        return 0;
> +    }
> +    if (lid == rid) {
> +        return 1;
> +    }
> +    for (i = 0; i < 16; i++) {
> +        if (lid->data[i] != rid->data[i]) {
> +            return 0;
> +        }
> +    }
> +    return 1;
> +}
> --
> 2.40.0
>
>
Albert Esteve May 22, 2023, 1:13 p.m. UTC | #2
On Sat, May 20, 2023 at 9:36 AM Marc-André Lureau <
marcandre.lureau@gmail.com> wrote:

> Hi
>
> On Thu, May 18, 2023 at 4:03 PM Albert Esteve <aesteve@redhat.com> wrote:
>
>> Add hash and an equal function to uuid module.
>>
>> Add a couple simple unit tests for new functions,
>> checking collisions for similar UUIDs in the case
>> of the hash function, and comparing generated UUIDs
>> for the equal function.
>>
>> Signed-off-by: Albert Esteve <aesteve@redhat.com>
>> ---
>>  include/qemu/uuid.h    |  4 ++++
>>  tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++
>>  util/uuid.c            | 38 ++++++++++++++++++++++++++++++++++
>>  3 files changed, 88 insertions(+)
>>
>> diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
>> index dc40ee1fc9..136df682c9 100644
>> --- a/include/qemu/uuid.h
>> +++ b/include/qemu/uuid.h
>> @@ -96,4 +96,8 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid);
>>
>>  QemuUUID qemu_uuid_bswap(QemuUUID uuid);
>>
>> +uint32_t qemu_uuid_hash(const void *uuid);
>> +
>> +int qemu_uuid_equal(const void *lhv, const void *rhv);
>>
>
> There is already qemu_uuid_is_equal()
>
>

Agh, true. I'll remove it. Not sure why my brain ignored it as I was
reading the code...


> +
>>  #endif
>> diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
>> index c111de5fc1..8c865869d5 100644
>> --- a/tests/unit/test-uuid.c
>> +++ b/tests/unit/test-uuid.c
>> @@ -171,6 +171,50 @@ static void test_uuid_unparse_strdup(void)
>>      }
>>  }
>>
>> +static void test_uuid_hash(void)
>> +{
>> +    QemuUUID uuid;
>> +    int i;
>> +
>> +    for (i = 0; i < 100; i++) {
>> +        qemu_uuid_generate(&uuid);
>> +        /* Obtain the UUID hash */
>> +        uint32_t hash_a = qemu_uuid_hash(&uuid);
>> +        int data_idx = g_random_int_range(0, 15);
>> +        /* Change a single random byte of the UUID */
>> +        if (uuid.data[data_idx] < 0xFF) {
>> +            uuid.data[data_idx]++;
>> +        } else {
>> +            uuid.data[data_idx]--;
>> +        }
>> +        /* Obtain the UUID hash again */
>> +        uint32_t hash_b = qemu_uuid_hash(&uuid);
>> +        /*
>> +         * Both hashes shall be different (avoid collision)
>> +         * for any change in the UUID fields
>> +         */
>> +        g_assert_cmpint(hash_a, !=, hash_b);
>> +    }
>> +}
>> +
>> +static void test_uuid_equal(void)
>> +{
>> +    QemuUUID uuid_a, uuid_b, uuid_c;
>> +    int i;
>> +
>> +    for (i = 0; i < 100; i++) {
>> +        qemu_uuid_generate(&uuid_a);
>> +        qemu_uuid_generate(&uuid_b);
>> +        memcpy(&uuid_c, &uuid_a, sizeof(uuid_a));
>> +
>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_a));
>> +        g_assert(qemu_uuid_equal(&uuid_b, &uuid_b));
>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_c));
>> +        g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b));
>> +        g_assert_false(qemu_uuid_equal(NULL, NULL));
>> +    }
>> +}
>> +
>>  int main(int argc, char **argv)
>>  {
>>      g_test_init(&argc, &argv, NULL);
>> @@ -179,6 +223,8 @@ int main(int argc, char **argv)
>>      g_test_add_func("/uuid/parse", test_uuid_parse);
>>      g_test_add_func("/uuid/unparse", test_uuid_unparse);
>>      g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
>> +    g_test_add_func("/uuid/hash", test_uuid_hash);
>> +    g_test_add_func("/uuid/equal", test_uuid_equal);
>>
>>      return g_test_run();
>>  }
>> diff --git a/util/uuid.c b/util/uuid.c
>> index b1108dde78..efa9b0a0e4 100644
>> --- a/util/uuid.c
>> +++ b/util/uuid.c
>> @@ -16,6 +16,7 @@
>>  #include "qemu/osdep.h"
>>  #include "qemu/uuid.h"
>>  #include "qemu/bswap.h"
>> +#include "qemu/xxhash.h"
>>
>>  void qemu_uuid_generate(QemuUUID *uuid)
>>  {
>> @@ -116,3 +117,40 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid)
>>      bswap16s(&uuid.fields.time_high_and_version);
>>      return uuid;
>>  }
>> +
>> +uint32_t qemu_uuid_hash(const void *uuid)
>> +{
>> +    QemuUUID *id = (QemuUUID *) uuid;
>> +    uint64_t ab = (id->fields.time_low) |
>> +                  (((uint64_t) id->fields.time_mid) << 32) |
>> +                  (((uint64_t) id->fields.time_high_and_version) << 48);
>> +    uint64_t cd = (id->fields.clock_seq_and_reserved) |
>> +                  (id->fields.clock_seq_low << 8);
>> +    int i = 0, shift = 8;
>> +
>> +    for (; i < 6; i++) {
>> +        shift += 8;
>> +        cd |= ((uint64_t) id->fields.node[i]) << shift;
>> +    }
>> +
>> +    return qemu_xxhash4(ab, cd);
>> +
>>
>
> That looks quite complex, and I have no idea if this is a good hash or not.
>
> Instead I would implement the traditional "djb" hash over the char[16]
> data (see g_str_hash implementation for \0-terminated implementation)
>

ok, I'll try to do something like that. Thanks for the suggestion.

I looked for any hash library within qemu code and xxhash was one of the
options that seemed easier to use.


>
>
>> }
>> +
>> +int qemu_uuid_equal(const void *lhv, const void *rhv)
>> +{
>> +    int i;
>> +    QemuUUID *lid = (QemuUUID *) lhv;
>> +    QemuUUID *rid = (QemuUUID *) rhv;
>> +    if (lid == NULL || rid == NULL) {
>> +        return 0;
>> +    }
>> +    if (lid == rid) {
>> +        return 1;
>> +    }
>> +    for (i = 0; i < 16; i++) {
>> +        if (lid->data[i] != rid->data[i]) {
>> +            return 0;
>> +        }
>> +    }
>> +    return 1;
>> +}
>> --
>> 2.40.0
>>
>>
>
> --
> Marc-André Lureau
>
Albert Esteve May 22, 2023, 1:41 p.m. UTC | #3
On Mon, May 22, 2023 at 3:13 PM Albert Esteve <aesteve@redhat.com> wrote:

>
>
>
> On Sat, May 20, 2023 at 9:36 AM Marc-André Lureau <
> marcandre.lureau@gmail.com> wrote:
>
>> Hi
>>
>> On Thu, May 18, 2023 at 4:03 PM Albert Esteve <aesteve@redhat.com> wrote:
>>
>>> Add hash and an equal function to uuid module.
>>>
>>> Add a couple simple unit tests for new functions,
>>> checking collisions for similar UUIDs in the case
>>> of the hash function, and comparing generated UUIDs
>>> for the equal function.
>>>
>>> Signed-off-by: Albert Esteve <aesteve@redhat.com>
>>> ---
>>>  include/qemu/uuid.h    |  4 ++++
>>>  tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++
>>>  util/uuid.c            | 38 ++++++++++++++++++++++++++++++++++
>>>  3 files changed, 88 insertions(+)
>>>
>>> diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
>>> index dc40ee1fc9..136df682c9 100644
>>> --- a/include/qemu/uuid.h
>>> +++ b/include/qemu/uuid.h
>>> @@ -96,4 +96,8 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid);
>>>
>>>  QemuUUID qemu_uuid_bswap(QemuUUID uuid);
>>>
>>> +uint32_t qemu_uuid_hash(const void *uuid);
>>> +
>>> +int qemu_uuid_equal(const void *lhv, const void *rhv);
>>>
>>
>> There is already qemu_uuid_is_equal()
>>
>>
>
> Agh, true. I'll remove it. Not sure why my brain ignored it as I was
> reading the code...
>

One thing to consider here is that the function signature, if we want to
pass it as a parameter for g_hash_table_new,
expects void pointers whereas qemu_uuid_is_equal() takes QemuUUID pointers.

How would you suggest proceeding here? Would be better to "overload" (or
wrap) a call to qemu_uuid_equal() from
another function that matches the expected signature?  (int
qemu_uuid_is_equal(void*, void*) is not a good name in that case,
it should be something that highlights the difference between the two in
the name) Or should I change the current existing
function signature?


>
>> +
>>>  #endif
>>> diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
>>> index c111de5fc1..8c865869d5 100644
>>> --- a/tests/unit/test-uuid.c
>>> +++ b/tests/unit/test-uuid.c
>>> @@ -171,6 +171,50 @@ static void test_uuid_unparse_strdup(void)
>>>      }
>>>  }
>>>
>>> +static void test_uuid_hash(void)
>>> +{
>>> +    QemuUUID uuid;
>>> +    int i;
>>> +
>>> +    for (i = 0; i < 100; i++) {
>>> +        qemu_uuid_generate(&uuid);
>>> +        /* Obtain the UUID hash */
>>> +        uint32_t hash_a = qemu_uuid_hash(&uuid);
>>> +        int data_idx = g_random_int_range(0, 15);
>>> +        /* Change a single random byte of the UUID */
>>> +        if (uuid.data[data_idx] < 0xFF) {
>>> +            uuid.data[data_idx]++;
>>> +        } else {
>>> +            uuid.data[data_idx]--;
>>> +        }
>>> +        /* Obtain the UUID hash again */
>>> +        uint32_t hash_b = qemu_uuid_hash(&uuid);
>>> +        /*
>>> +         * Both hashes shall be different (avoid collision)
>>> +         * for any change in the UUID fields
>>> +         */
>>> +        g_assert_cmpint(hash_a, !=, hash_b);
>>> +    }
>>> +}
>>> +
>>> +static void test_uuid_equal(void)
>>> +{
>>> +    QemuUUID uuid_a, uuid_b, uuid_c;
>>> +    int i;
>>> +
>>> +    for (i = 0; i < 100; i++) {
>>> +        qemu_uuid_generate(&uuid_a);
>>> +        qemu_uuid_generate(&uuid_b);
>>> +        memcpy(&uuid_c, &uuid_a, sizeof(uuid_a));
>>> +
>>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_a));
>>> +        g_assert(qemu_uuid_equal(&uuid_b, &uuid_b));
>>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_c));
>>> +        g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b));
>>> +        g_assert_false(qemu_uuid_equal(NULL, NULL));
>>> +    }
>>> +}
>>> +
>>>  int main(int argc, char **argv)
>>>  {
>>>      g_test_init(&argc, &argv, NULL);
>>> @@ -179,6 +223,8 @@ int main(int argc, char **argv)
>>>      g_test_add_func("/uuid/parse", test_uuid_parse);
>>>      g_test_add_func("/uuid/unparse", test_uuid_unparse);
>>>      g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
>>> +    g_test_add_func("/uuid/hash", test_uuid_hash);
>>> +    g_test_add_func("/uuid/equal", test_uuid_equal);
>>>
>>>      return g_test_run();
>>>  }
>>> diff --git a/util/uuid.c b/util/uuid.c
>>> index b1108dde78..efa9b0a0e4 100644
>>> --- a/util/uuid.c
>>> +++ b/util/uuid.c
>>> @@ -16,6 +16,7 @@
>>>  #include "qemu/osdep.h"
>>>  #include "qemu/uuid.h"
>>>  #include "qemu/bswap.h"
>>> +#include "qemu/xxhash.h"
>>>
>>>  void qemu_uuid_generate(QemuUUID *uuid)
>>>  {
>>> @@ -116,3 +117,40 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid)
>>>      bswap16s(&uuid.fields.time_high_and_version);
>>>      return uuid;
>>>  }
>>> +
>>> +uint32_t qemu_uuid_hash(const void *uuid)
>>> +{
>>> +    QemuUUID *id = (QemuUUID *) uuid;
>>> +    uint64_t ab = (id->fields.time_low) |
>>> +                  (((uint64_t) id->fields.time_mid) << 32) |
>>> +                  (((uint64_t) id->fields.time_high_and_version) << 48);
>>> +    uint64_t cd = (id->fields.clock_seq_and_reserved) |
>>> +                  (id->fields.clock_seq_low << 8);
>>> +    int i = 0, shift = 8;
>>> +
>>> +    for (; i < 6; i++) {
>>> +        shift += 8;
>>> +        cd |= ((uint64_t) id->fields.node[i]) << shift;
>>> +    }
>>> +
>>> +    return qemu_xxhash4(ab, cd);
>>> +
>>>
>>
>> That looks quite complex, and I have no idea if this is a good hash or
>> not.
>>
>> Instead I would implement the traditional "djb" hash over the char[16]
>> data (see g_str_hash implementation for \0-terminated implementation)
>>
>
> ok, I'll try to do something like that. Thanks for the suggestion.
>
> I looked for any hash library within qemu code and xxhash was one of the
> options that seemed easier to use.
>
>
>>
>>
>>> }
>>> +
>>> +int qemu_uuid_equal(const void *lhv, const void *rhv)
>>> +{
>>> +    int i;
>>> +    QemuUUID *lid = (QemuUUID *) lhv;
>>> +    QemuUUID *rid = (QemuUUID *) rhv;
>>> +    if (lid == NULL || rid == NULL) {
>>> +        return 0;
>>> +    }
>>> +    if (lid == rid) {
>>> +        return 1;
>>> +    }
>>> +    for (i = 0; i < 16; i++) {
>>> +        if (lid->data[i] != rid->data[i]) {
>>> +            return 0;
>>> +        }
>>> +    }
>>> +    return 1;
>>> +}
>>> --
>>> 2.40.0
>>>
>>>
>>
>> --
>> Marc-André Lureau
>>
>
Marc-André Lureau May 22, 2023, 1:42 p.m. UTC | #4
Hi

On Mon, May 22, 2023 at 5:41 PM Albert Esteve <aesteve@redhat.com> wrote:

>
>
>
> On Mon, May 22, 2023 at 3:13 PM Albert Esteve <aesteve@redhat.com> wrote:
>
>>
>>
>>
>> On Sat, May 20, 2023 at 9:36 AM Marc-André Lureau <
>> marcandre.lureau@gmail.com> wrote:
>>
>>> Hi
>>>
>>> On Thu, May 18, 2023 at 4:03 PM Albert Esteve <aesteve@redhat.com>
>>> wrote:
>>>
>>>> Add hash and an equal function to uuid module.
>>>>
>>>> Add a couple simple unit tests for new functions,
>>>> checking collisions for similar UUIDs in the case
>>>> of the hash function, and comparing generated UUIDs
>>>> for the equal function.
>>>>
>>>> Signed-off-by: Albert Esteve <aesteve@redhat.com>
>>>> ---
>>>>  include/qemu/uuid.h    |  4 ++++
>>>>  tests/unit/test-uuid.c | 46 ++++++++++++++++++++++++++++++++++++++++++
>>>>  util/uuid.c            | 38 ++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 88 insertions(+)
>>>>
>>>> diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
>>>> index dc40ee1fc9..136df682c9 100644
>>>> --- a/include/qemu/uuid.h
>>>> +++ b/include/qemu/uuid.h
>>>> @@ -96,4 +96,8 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid);
>>>>
>>>>  QemuUUID qemu_uuid_bswap(QemuUUID uuid);
>>>>
>>>> +uint32_t qemu_uuid_hash(const void *uuid);
>>>> +
>>>> +int qemu_uuid_equal(const void *lhv, const void *rhv);
>>>>
>>>
>>> There is already qemu_uuid_is_equal()
>>>
>>>
>>
>> Agh, true. I'll remove it. Not sure why my brain ignored it as I was
>> reading the code...
>>
>
> One thing to consider here is that the function signature, if we want to
> pass it as a parameter for g_hash_table_new,
> expects void pointers whereas qemu_uuid_is_equal() takes QemuUUID pointers.
>
> How would you suggest proceeding here? Would be better to "overload" (or
> wrap) a call to qemu_uuid_equal() from
> another function that matches the expected signature?  (int
> qemu_uuid_is_equal(void*, void*) is not a good name in that case,
> it should be something that highlights the difference between the two in
> the name) Or should I change the current existing
> function signature?
>

I would wrap the call, next to g_hash_table_new(). Alternatively, just cast
the function.

>
>
>>
>>> +
>>>>  #endif
>>>> diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
>>>> index c111de5fc1..8c865869d5 100644
>>>> --- a/tests/unit/test-uuid.c
>>>> +++ b/tests/unit/test-uuid.c
>>>> @@ -171,6 +171,50 @@ static void test_uuid_unparse_strdup(void)
>>>>      }
>>>>  }
>>>>
>>>> +static void test_uuid_hash(void)
>>>> +{
>>>> +    QemuUUID uuid;
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < 100; i++) {
>>>> +        qemu_uuid_generate(&uuid);
>>>> +        /* Obtain the UUID hash */
>>>> +        uint32_t hash_a = qemu_uuid_hash(&uuid);
>>>> +        int data_idx = g_random_int_range(0, 15);
>>>> +        /* Change a single random byte of the UUID */
>>>> +        if (uuid.data[data_idx] < 0xFF) {
>>>> +            uuid.data[data_idx]++;
>>>> +        } else {
>>>> +            uuid.data[data_idx]--;
>>>> +        }
>>>> +        /* Obtain the UUID hash again */
>>>> +        uint32_t hash_b = qemu_uuid_hash(&uuid);
>>>> +        /*
>>>> +         * Both hashes shall be different (avoid collision)
>>>> +         * for any change in the UUID fields
>>>> +         */
>>>> +        g_assert_cmpint(hash_a, !=, hash_b);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void test_uuid_equal(void)
>>>> +{
>>>> +    QemuUUID uuid_a, uuid_b, uuid_c;
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < 100; i++) {
>>>> +        qemu_uuid_generate(&uuid_a);
>>>> +        qemu_uuid_generate(&uuid_b);
>>>> +        memcpy(&uuid_c, &uuid_a, sizeof(uuid_a));
>>>> +
>>>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_a));
>>>> +        g_assert(qemu_uuid_equal(&uuid_b, &uuid_b));
>>>> +        g_assert(qemu_uuid_equal(&uuid_a, &uuid_c));
>>>> +        g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b));
>>>> +        g_assert_false(qemu_uuid_equal(NULL, NULL));
>>>> +    }
>>>> +}
>>>> +
>>>>  int main(int argc, char **argv)
>>>>  {
>>>>      g_test_init(&argc, &argv, NULL);
>>>> @@ -179,6 +223,8 @@ int main(int argc, char **argv)
>>>>      g_test_add_func("/uuid/parse", test_uuid_parse);
>>>>      g_test_add_func("/uuid/unparse", test_uuid_unparse);
>>>>      g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
>>>> +    g_test_add_func("/uuid/hash", test_uuid_hash);
>>>> +    g_test_add_func("/uuid/equal", test_uuid_equal);
>>>>
>>>>      return g_test_run();
>>>>  }
>>>> diff --git a/util/uuid.c b/util/uuid.c
>>>> index b1108dde78..efa9b0a0e4 100644
>>>> --- a/util/uuid.c
>>>> +++ b/util/uuid.c
>>>> @@ -16,6 +16,7 @@
>>>>  #include "qemu/osdep.h"
>>>>  #include "qemu/uuid.h"
>>>>  #include "qemu/bswap.h"
>>>> +#include "qemu/xxhash.h"
>>>>
>>>>  void qemu_uuid_generate(QemuUUID *uuid)
>>>>  {
>>>> @@ -116,3 +117,40 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid)
>>>>      bswap16s(&uuid.fields.time_high_and_version);
>>>>      return uuid;
>>>>  }
>>>> +
>>>> +uint32_t qemu_uuid_hash(const void *uuid)
>>>> +{
>>>> +    QemuUUID *id = (QemuUUID *) uuid;
>>>> +    uint64_t ab = (id->fields.time_low) |
>>>> +                  (((uint64_t) id->fields.time_mid) << 32) |
>>>> +                  (((uint64_t) id->fields.time_high_and_version) <<
>>>> 48);
>>>> +    uint64_t cd = (id->fields.clock_seq_and_reserved) |
>>>> +                  (id->fields.clock_seq_low << 8);
>>>> +    int i = 0, shift = 8;
>>>> +
>>>> +    for (; i < 6; i++) {
>>>> +        shift += 8;
>>>> +        cd |= ((uint64_t) id->fields.node[i]) << shift;
>>>> +    }
>>>> +
>>>> +    return qemu_xxhash4(ab, cd);
>>>> +
>>>>
>>>
>>> That looks quite complex, and I have no idea if this is a good hash or
>>> not.
>>>
>>> Instead I would implement the traditional "djb" hash over the char[16]
>>> data (see g_str_hash implementation for \0-terminated implementation)
>>>
>>
>> ok, I'll try to do something like that. Thanks for the suggestion.
>>
>> I looked for any hash library within qemu code and xxhash was one of the
>> options that seemed easier to use.
>>
>>
>>>
>>>
>>>> }
>>>> +
>>>> +int qemu_uuid_equal(const void *lhv, const void *rhv)
>>>> +{
>>>> +    int i;
>>>> +    QemuUUID *lid = (QemuUUID *) lhv;
>>>> +    QemuUUID *rid = (QemuUUID *) rhv;
>>>> +    if (lid == NULL || rid == NULL) {
>>>> +        return 0;
>>>> +    }
>>>> +    if (lid == rid) {
>>>> +        return 1;
>>>> +    }
>>>> +    for (i = 0; i < 16; i++) {
>>>> +        if (lid->data[i] != rid->data[i]) {
>>>> +            return 0;
>>>> +        }
>>>> +    }
>>>> +    return 1;
>>>> +}
>>>> --
>>>> 2.40.0
>>>>
>>>>
>>>
>>> --
>>> Marc-André Lureau
>>>
>>
diff mbox series

Patch

diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
index dc40ee1fc9..136df682c9 100644
--- a/include/qemu/uuid.h
+++ b/include/qemu/uuid.h
@@ -96,4 +96,8 @@  int qemu_uuid_parse(const char *str, QemuUUID *uuid);
 
 QemuUUID qemu_uuid_bswap(QemuUUID uuid);
 
+uint32_t qemu_uuid_hash(const void *uuid);
+
+int qemu_uuid_equal(const void *lhv, const void *rhv);
+
 #endif
diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
index c111de5fc1..8c865869d5 100644
--- a/tests/unit/test-uuid.c
+++ b/tests/unit/test-uuid.c
@@ -171,6 +171,50 @@  static void test_uuid_unparse_strdup(void)
     }
 }
 
+static void test_uuid_hash(void)
+{
+    QemuUUID uuid;
+    int i;
+
+    for (i = 0; i < 100; i++) {
+        qemu_uuid_generate(&uuid);
+        /* Obtain the UUID hash */
+        uint32_t hash_a = qemu_uuid_hash(&uuid);
+        int data_idx = g_random_int_range(0, 15);
+        /* Change a single random byte of the UUID */
+        if (uuid.data[data_idx] < 0xFF) {
+            uuid.data[data_idx]++;
+        } else {
+            uuid.data[data_idx]--;
+        }
+        /* Obtain the UUID hash again */
+        uint32_t hash_b = qemu_uuid_hash(&uuid);
+        /*
+         * Both hashes shall be different (avoid collision)
+         * for any change in the UUID fields
+         */
+        g_assert_cmpint(hash_a, !=, hash_b);
+    }
+}
+
+static void test_uuid_equal(void)
+{
+    QemuUUID uuid_a, uuid_b, uuid_c;
+    int i;
+
+    for (i = 0; i < 100; i++) {
+        qemu_uuid_generate(&uuid_a);
+        qemu_uuid_generate(&uuid_b);
+        memcpy(&uuid_c, &uuid_a, sizeof(uuid_a));
+
+        g_assert(qemu_uuid_equal(&uuid_a, &uuid_a));
+        g_assert(qemu_uuid_equal(&uuid_b, &uuid_b));
+        g_assert(qemu_uuid_equal(&uuid_a, &uuid_c));
+        g_assert_false(qemu_uuid_equal(&uuid_a, &uuid_b));
+        g_assert_false(qemu_uuid_equal(NULL, NULL));
+    }
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -179,6 +223,8 @@  int main(int argc, char **argv)
     g_test_add_func("/uuid/parse", test_uuid_parse);
     g_test_add_func("/uuid/unparse", test_uuid_unparse);
     g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
+    g_test_add_func("/uuid/hash", test_uuid_hash);
+    g_test_add_func("/uuid/equal", test_uuid_equal);
 
     return g_test_run();
 }
diff --git a/util/uuid.c b/util/uuid.c
index b1108dde78..efa9b0a0e4 100644
--- a/util/uuid.c
+++ b/util/uuid.c
@@ -16,6 +16,7 @@ 
 #include "qemu/osdep.h"
 #include "qemu/uuid.h"
 #include "qemu/bswap.h"
+#include "qemu/xxhash.h"
 
 void qemu_uuid_generate(QemuUUID *uuid)
 {
@@ -116,3 +117,40 @@  QemuUUID qemu_uuid_bswap(QemuUUID uuid)
     bswap16s(&uuid.fields.time_high_and_version);
     return uuid;
 }
+
+uint32_t qemu_uuid_hash(const void *uuid)
+{
+    QemuUUID *id = (QemuUUID *) uuid;
+    uint64_t ab = (id->fields.time_low) |
+                  (((uint64_t) id->fields.time_mid) << 32) |
+                  (((uint64_t) id->fields.time_high_and_version) << 48);
+    uint64_t cd = (id->fields.clock_seq_and_reserved) |
+                  (id->fields.clock_seq_low << 8);
+    int i = 0, shift = 8;
+
+    for (; i < 6; i++) {
+        shift += 8;
+        cd |= ((uint64_t) id->fields.node[i]) << shift;
+    }
+
+    return qemu_xxhash4(ab, cd);
+}
+
+int qemu_uuid_equal(const void *lhv, const void *rhv)
+{
+    int i;
+    QemuUUID *lid = (QemuUUID *) lhv;
+    QemuUUID *rid = (QemuUUID *) rhv;
+    if (lid == NULL || rid == NULL) {
+        return 0;
+    }
+    if (lid == rid) {
+        return 1;
+    }
+    for (i = 0; i < 16; i++) {
+        if (lid->data[i] != rid->data[i]) {
+            return 0;
+        }
+    }
+    return 1;
+}