diff mbox series

[kvm-unit-tests,v2,1/4] lib/string: Add strnlen, strrchr and strtoul

Message ID 20210318180727.116004-2-nikos.nikoleris@arm.com (mailing list archive)
State New, archived
Headers show
Series Fix the devicetree parser for stdout-path | expand

Commit Message

Nikos Nikoleris March 18, 2021, 6:07 p.m. UTC
This change adds two functions from <string.h> and one from <stdlib.h>
in preparation for an update in libfdt.

Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
---
 lib/stdlib.h | 12 +++++++++
 lib/string.h |  4 ++-
 lib/string.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 77 insertions(+), 9 deletions(-)
 create mode 100644 lib/stdlib.h

Comments

Andrew Jones March 22, 2021, 8:35 a.m. UTC | #1
On Thu, Mar 18, 2021 at 06:07:24PM +0000, Nikos Nikoleris wrote:
> This change adds two functions from <string.h> and one from <stdlib.h>
> in preparation for an update in libfdt.
> 
> Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
> ---
>  lib/stdlib.h | 12 +++++++++
>  lib/string.h |  4 ++-
>  lib/string.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++------
>  3 files changed, 77 insertions(+), 9 deletions(-)
>  create mode 100644 lib/stdlib.h
> 
> diff --git a/lib/stdlib.h b/lib/stdlib.h
> new file mode 100644
> index 0000000..e8abe75
> --- /dev/null
> +++ b/lib/stdlib.h
> @@ -0,0 +1,12 @@
> +/*
> + * Header for libc stdlib functions
> + *
> + * This code is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU Library General Public License version 2.
> + */
> +#ifndef _STDLIB_H_
> +#define _STDLIB_H_
> +
> +unsigned long int strtoul(const char *nptr, char **endptr, int base);
> +
> +#endif /* _STDLIB_H_ */
> diff --git a/lib/string.h b/lib/string.h
> index 493d51b..8da687e 100644
> --- a/lib/string.h
> +++ b/lib/string.h
> @@ -7,12 +7,14 @@
>  #ifndef __STRING_H
>  #define __STRING_H
>  
> -extern unsigned long strlen(const char *buf);
> +extern size_t strlen(const char *buf);
> +extern size_t strnlen(const char *buf, size_t maxlen);
>  extern char *strcat(char *dest, const char *src);
>  extern char *strcpy(char *dest, const char *src);
>  extern int strcmp(const char *a, const char *b);
>  extern int strncmp(const char *a, const char *b, size_t n);
>  extern char *strchr(const char *s, int c);
> +extern char *strrchr(const char *s, int c);
>  extern char *strstr(const char *haystack, const char *needle);
>  extern void *memset(void *s, int c, size_t n);
>  extern void *memcpy(void *dest, const void *src, size_t n);
> diff --git a/lib/string.c b/lib/string.c
> index 75257f5..f77881f 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -6,16 +6,26 @@
>   */
>  
>  #include "libcflat.h"
> +#include "stdlib.h"
>  
> -unsigned long strlen(const char *buf)
> +size_t strlen(const char *buf)
>  {
> -    unsigned long len = 0;
> +    size_t len = 0;
>  
>      while (*buf++)
>  	++len;
>      return len;
>  }
>  
> +size_t strnlen(const char *buf, size_t maxlen)
> +{
> +    const char *sc;
> +
> +    for (sc = buf; maxlen-- && *sc != '\0'; ++sc)
> +        /* nothing */;
> +    return sc - buf;
> +}
> +
>  char *strcat(char *dest, const char *src)
>  {
>      char *p = dest;
> @@ -55,6 +65,16 @@ char *strchr(const char *s, int c)
>      return (char *)s;
>  }
>  
> +char *strrchr(const char *s, int c)
> +{
> +    const char *last = NULL;
> +    do {
> +        if (*s == (char)c)
> +            last = s;
> +    } while (*s++);
> +    return (char *)last;
> +}
> +
>  char *strstr(const char *s1, const char *s2)
>  {
>      size_t l1, l2;
> @@ -135,13 +155,21 @@ void *memchr(const void *s, int c, size_t n)
>      return NULL;
>  }
>  
> -long atol(const char *ptr)
> +static int isspace(int c)
> +{
> +    return c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r';

I added \v. We don't need to do it for this patch, but at some point we
should consider adding a ctype.h file and consolidating all these is*
functions. There's a few in lib/argv.c too.

> +}
> +
> +unsigned long int strtoul(const char *nptr, char **endptr, int base)
>  {
>      long acc = 0;
> -    const char *s = ptr;
> +    const char *s = nptr;
>      int neg, c;
>  
> -    while (*s == ' ' || *s == '\t')
> +    if (base < 0 || base == 1 || base > 32)
> +        goto out; // errno = EINVAL

I changed this to

 assert(base == 0 || (base >= 2 && base <= 36));

Any reason why you weren't allowing bases 33 - 36?

> +
> +    while (isspace(*s))
>          s++;
>      if (*s == '-'){
>          neg = 1;
> @@ -152,20 +180,46 @@ long atol(const char *ptr)
>              s++;
>      }
>  
> +    if (base == 0 || base == 16) {
> +        if (*s == '0') {
> +            s++;
> +            if (*s == 'x') {

I changed this to (*s == 'x' || *s == 'X')

> +                 s++;
> +                 base = 16;
> +            } else if (base == 0)
> +                 base = 8;
> +        } else if (base == 0)
> +            base = 10;
> +    }
> +
>      while (*s) {
> -        if (*s < '0' || *s > '9')
> +        if (*s >= '0' && *s < '0' + base && *s <= '9')
> +            c = *s - '0';
> +        else if (*s >= 'a' && *s < 'a' + base - 10)
> +            c = *s - 'a' + 10;
> +        else if (*s >= 'A' && *s < 'A' + base - 10)
> +            c = *s - 'A' + 10;
> +        else
>              break;
> -        c = *s - '0';
> -        acc = acc * 10 + c;
> +        acc = acc * base + c;

I changed this to catch overflow.

>          s++;
>      }
>  
>      if (neg)
>          acc = -acc;
>  
> + out:
> +    if (endptr)
> +        *endptr = (char *)s;
> +
>      return acc;
>  }
>  
> +long atol(const char *ptr)
> +{
> +    return strtoul(ptr, NULL, 10);

Since atol should be strtol, I went ahead and also added strtol.

> +}
> +
>  extern char **environ;
>  
>  char *getenv(const char *name)
> -- 
> 2.25.1
> 

Here's a diff of my changes on top of your patch


diff --git a/lib/string.c b/lib/string.c
index 30592c5603c5..b684271bb18f 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -164,21 +164,22 @@ void *memchr(const void *s, int c, size_t n)
 
 static int isspace(int c)
 {
-    return c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r';
+    return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
 }
 
-unsigned long int strtoul(const char *nptr, char **endptr, int base)
-{
-    long acc = 0;
+static unsigned long __strtol(const char *nptr, char **endptr,
+                              int base, bool is_signed) {
+    unsigned long acc = 0;
     const char *s = nptr;
+    bool overflow;
     int neg, c;
 
-    if (base < 0 || base == 1 || base > 32)
-        goto out; // errno = EINVAL
+    assert(base == 0 || (base >= 2 && base <= 36));
 
     while (isspace(*s))
         s++;
-    if (*s == '-'){
+
+    if (*s == '-') {
         neg = 1;
         s++;
     } else {
@@ -190,7 +191,7 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
     if (base == 0 || base == 16) {
         if (*s == '0') {
             s++;
-            if (*s == 'x') {
+            if (*s == 'x' || *s == 'X') {
                  s++;
                  base = 16;
             } else if (base == 0)
@@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
             c = *s - 'A' + 10;
         else
             break;
-        acc = acc * base + c;
+
+        if (is_signed) {
+            long __acc = (long)acc;
+            overflow = __builtin_smull_overflow(__acc, base, &__acc);
+            assert(!overflow);
+            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
+            assert(!overflow);
+            acc = (unsigned long)__acc;
+        } else {
+            overflow = __builtin_umull_overflow(acc, base, &acc);
+            assert(!overflow);
+            overflow = __builtin_uaddl_overflow(acc, c, &acc);
+            assert(!overflow);
+        }
+
         s++;
     }
 
     if (neg)
         acc = -acc;
 
- out:
     if (endptr)
         *endptr = (char *)s;
 
     return acc;
 }
 
+long int strtol(const char *nptr, char **endptr, int base)
+{
+    return __strtol(nptr, endptr, base, true);
+}
+
+unsigned long int strtoul(const char *nptr, char **endptr, int base)
+{
+    return __strtol(nptr, endptr, base, false);
+}
+
 long atol(const char *ptr)
 {
-    return strtoul(ptr, NULL, 10);
+    return strtol(ptr, NULL, 10);
 }
 
 extern char **environ;


Thanks,
drew
Nikos Nikoleris March 22, 2021, 9:52 a.m. UTC | #2
Hi Drew,

On 22/03/2021 08:35, Andrew Jones wrote:
> On Thu, Mar 18, 2021 at 06:07:24PM +0000, Nikos Nikoleris wrote:
>> This change adds two functions from <string.h> and one from <stdlib.h>
>> in preparation for an update in libfdt.
>>
>> Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
>> ---
>>   lib/stdlib.h | 12 +++++++++
>>   lib/string.h |  4 ++-
>>   lib/string.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++------
>>   3 files changed, 77 insertions(+), 9 deletions(-)
>>   create mode 100644 lib/stdlib.h
>>
>> diff --git a/lib/stdlib.h b/lib/stdlib.h
>> new file mode 100644
>> index 0000000..e8abe75
>> --- /dev/null
>> +++ b/lib/stdlib.h
>> @@ -0,0 +1,12 @@
>> +/*
>> + * Header for libc stdlib functions
>> + *
>> + * This code is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU Library General Public License version 2.
>> + */
>> +#ifndef _STDLIB_H_
>> +#define _STDLIB_H_
>> +
>> +unsigned long int strtoul(const char *nptr, char **endptr, int base);
>> +
>> +#endif /* _STDLIB_H_ */
>> diff --git a/lib/string.h b/lib/string.h
>> index 493d51b..8da687e 100644
>> --- a/lib/string.h
>> +++ b/lib/string.h
>> @@ -7,12 +7,14 @@
>>   #ifndef __STRING_H
>>   #define __STRING_H
>>   
>> -extern unsigned long strlen(const char *buf);
>> +extern size_t strlen(const char *buf);
>> +extern size_t strnlen(const char *buf, size_t maxlen);
>>   extern char *strcat(char *dest, const char *src);
>>   extern char *strcpy(char *dest, const char *src);
>>   extern int strcmp(const char *a, const char *b);
>>   extern int strncmp(const char *a, const char *b, size_t n);
>>   extern char *strchr(const char *s, int c);
>> +extern char *strrchr(const char *s, int c);
>>   extern char *strstr(const char *haystack, const char *needle);
>>   extern void *memset(void *s, int c, size_t n);
>>   extern void *memcpy(void *dest, const void *src, size_t n);
>> diff --git a/lib/string.c b/lib/string.c
>> index 75257f5..f77881f 100644
>> --- a/lib/string.c
>> +++ b/lib/string.c
>> @@ -6,16 +6,26 @@
>>    */
>>   
>>   #include "libcflat.h"
>> +#include "stdlib.h"
>>   
>> -unsigned long strlen(const char *buf)
>> +size_t strlen(const char *buf)
>>   {
>> -    unsigned long len = 0;
>> +    size_t len = 0;
>>   
>>       while (*buf++)
>>   	++len;
>>       return len;
>>   }
>>   
>> +size_t strnlen(const char *buf, size_t maxlen)
>> +{
>> +    const char *sc;
>> +
>> +    for (sc = buf; maxlen-- && *sc != '\0'; ++sc)
>> +        /* nothing */;
>> +    return sc - buf;
>> +}
>> +
>>   char *strcat(char *dest, const char *src)
>>   {
>>       char *p = dest;
>> @@ -55,6 +65,16 @@ char *strchr(const char *s, int c)
>>       return (char *)s;
>>   }
>>   
>> +char *strrchr(const char *s, int c)
>> +{
>> +    const char *last = NULL;
>> +    do {
>> +        if (*s == (char)c)
>> +            last = s;
>> +    } while (*s++);
>> +    return (char *)last;
>> +}
>> +
>>   char *strstr(const char *s1, const char *s2)
>>   {
>>       size_t l1, l2;
>> @@ -135,13 +155,21 @@ void *memchr(const void *s, int c, size_t n)
>>       return NULL;
>>   }
>>   
>> -long atol(const char *ptr)
>> +static int isspace(int c)
>> +{
>> +    return c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r';
> 
> I added \v. We don't need to do it for this patch, but at some point we
> should consider adding a ctype.h file and consolidating all these is*
> functions. There's a few in lib/argv.c too.
> 

I agree.

>> +}
>> +
>> +unsigned long int strtoul(const char *nptr, char **endptr, int base)
>>   {
>>       long acc = 0;
>> -    const char *s = ptr;
>> +    const char *s = nptr;
>>       int neg, c;
>>   
>> -    while (*s == ' ' || *s == '\t')
>> +    if (base < 0 || base == 1 || base > 32)
>> +        goto out; // errno = EINVAL
> 
> I changed this to
> 
>   assert(base == 0 || (base >= 2 && base <= 36));
> 
> Any reason why you weren't allowing bases 33 - 36?
> 

I was going through the manpage for strtoul and I got confused. 36 is 
the right value.

I wasn't sure if we should assert, the manpage seems to imply that it 
will return without converting and set the errno and endptr. I guess it 
might be better to assert().

>> +
>> +    while (isspace(*s))
>>           s++;
>>       if (*s == '-'){
>>           neg = 1;
>> @@ -152,20 +180,46 @@ long atol(const char *ptr)
>>               s++;
>>       }
>>   
>> +    if (base == 0 || base == 16) {
>> +        if (*s == '0') {
>> +            s++;
>> +            if (*s == 'x') {
> 
> I changed this to (*s == 'x' || *s == 'X')
> 

Here my intent was to not parse 0X as a valid prefix for base 16, 0X is 
not in the manpage.

>> +                 s++;
>> +                 base = 16;
>> +            } else if (base == 0)
>> +                 base = 8;
>> +        } else if (base == 0)
>> +            base = 10;
>> +    }
>> +
>>       while (*s) {
>> -        if (*s < '0' || *s > '9')
>> +        if (*s >= '0' && *s < '0' + base && *s <= '9')
>> +            c = *s - '0';
>> +        else if (*s >= 'a' && *s < 'a' + base - 10)
>> +            c = *s - 'a' + 10;
>> +        else if (*s >= 'A' && *s < 'A' + base - 10)
>> +            c = *s - 'A' + 10;
>> +        else
>>               break;
>> -        c = *s - '0';
>> -        acc = acc * 10 + c;
>> +        acc = acc * base + c;
> 
> I changed this to catch overflow.
> 

Thanks! Some thoughts on the assertion.

>>           s++;
>>       }
>>   
>>       if (neg)
>>           acc = -acc;
>>   
>> + out:
>> +    if (endptr)
>> +        *endptr = (char *)s;
>> +
>>       return acc;
>>   }
>>   
>> +long atol(const char *ptr)
>> +{
>> +    return strtoul(ptr, NULL, 10);
> 
> Since atol should be strtol, I went ahead and also added strtol.
>

Not very important but we could also add it to stdlib.h?


Thanks for the fixes it looks much better now!

Nikos


>> +}
>> +
>>   extern char **environ;
>>   
>>   char *getenv(const char *name)
>> -- 
>> 2.25.1
>>
> 
> Here's a diff of my changes on top of your patch
> 
> 
> diff --git a/lib/string.c b/lib/string.c
> index 30592c5603c5..b684271bb18f 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -164,21 +164,22 @@ void *memchr(const void *s, int c, size_t n)
>   
>   static int isspace(int c)
>   {
> -    return c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r';
> +    return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
>   }
>   
> -unsigned long int strtoul(const char *nptr, char **endptr, int base)
> -{
> -    long acc = 0;
> +static unsigned long __strtol(const char *nptr, char **endptr,
> +                              int base, bool is_signed) {
> +    unsigned long acc = 0;
>       const char *s = nptr;
> +    bool overflow;
>       int neg, c;
>   
> -    if (base < 0 || base == 1 || base > 32)
> -        goto out; // errno = EINVAL
> +    assert(base == 0 || (base >= 2 && base <= 36));
>   
>       while (isspace(*s))
>           s++;
> -    if (*s == '-'){
> +
> +    if (*s == '-') {
>           neg = 1;
>           s++;
>       } else {
> @@ -190,7 +191,7 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
>       if (base == 0 || base == 16) {
>           if (*s == '0') {
>               s++;
> -            if (*s == 'x') {
> +            if (*s == 'x' || *s == 'X') {
>                    s++;
>                    base = 16;
>               } else if (base == 0)
> @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
>               c = *s - 'A' + 10;
>           else
>               break;
> -        acc = acc * base + c;
> +
> +        if (is_signed) {
> +            long __acc = (long)acc;
> +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> +            assert(!overflow);
> +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> +            assert(!overflow);
> +            acc = (unsigned long)__acc;
> +        } else {
> +            overflow = __builtin_umull_overflow(acc, base, &acc);
> +            assert(!overflow);
> +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> +            assert(!overflow);
> +        }
> +
>           s++;
>       }
>   
>       if (neg)
>           acc = -acc;
>   
> - out:
>       if (endptr)
>           *endptr = (char *)s;
>   
>       return acc;
>   }
>   
> +long int strtol(const char *nptr, char **endptr, int base)
> +{
> +    return __strtol(nptr, endptr, base, true);
> +}
> +
> +unsigned long int strtoul(const char *nptr, char **endptr, int base)
> +{
> +    return __strtol(nptr, endptr, base, false);
> +}
> +
>   long atol(const char *ptr)
>   {
> -    return strtoul(ptr, NULL, 10);
> +    return strtol(ptr, NULL, 10);
>   }
>   
>   extern char **environ;
> 
> 
> Thanks,
> drew
>
Andrew Jones March 22, 2021, 10:09 a.m. UTC | #3
On Mon, Mar 22, 2021 at 09:52:43AM +0000, Nikos Nikoleris wrote:
> > > +unsigned long int strtoul(const char *nptr, char **endptr, int base)
> > >   {
> > >       long acc = 0;
> > > -    const char *s = ptr;
> > > +    const char *s = nptr;
> > >       int neg, c;
> > > -    while (*s == ' ' || *s == '\t')
> > > +    if (base < 0 || base == 1 || base > 32)
> > > +        goto out; // errno = EINVAL
> > 
> > I changed this to
> > 
> >   assert(base == 0 || (base >= 2 && base <= 36));
> > 
> > Any reason why you weren't allowing bases 33 - 36?
> > 
> 
> I was going through the manpage for strtoul and I got confused. 36 is the
> right value.
> 
> I wasn't sure if we should assert, the manpage seems to imply that it will
> return without converting and set the errno and endptr. I guess it might be
> better to assert().

Yeah, I think so for our little test framework. Anything that would result
in EINVAL means fix your test code. assert should help find those things
more quickly.

> 
> > > +
> > > +    while (isspace(*s))
> > >           s++;
> > >       if (*s == '-'){
> > >           neg = 1;
> > > @@ -152,20 +180,46 @@ long atol(const char *ptr)
> > >               s++;
> > >       }
> > > +    if (base == 0 || base == 16) {
> > > +        if (*s == '0') {
> > > +            s++;
> > > +            if (*s == 'x') {
> > 
> > I changed this to (*s == 'x' || *s == 'X')
> > 
> 
> Here my intent was to not parse 0X as a valid prefix for base 16, 0X is not
> in the manpage.

It's a manpage bug. strtol's manpage does specify it and libc's strtoul
allows it.

> > > +long atol(const char *ptr)
> > > +{
> > > +    return strtoul(ptr, NULL, 10);
> > 
> > Since atol should be strtol, I went ahead and also added strtol.
> > 
> 
> Not very important but we could also add it to stdlib.h?

Yeah, atol should go to stdlib, but I think that's another cleanup patch
we can do later. I'd actually like to kill libcflat and start using
standard headers for everything. At least for starters we could create
all the standard headers we need and move all the prototypes out of
libcflat but keep libcflat as a header full of #includes.

Thanks,
drew
Andrew Jones March 23, 2021, 12:14 p.m. UTC | #4
On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:
> @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
>              c = *s - 'A' + 10;
>          else
>              break;
> -        acc = acc * base + c;
> +
> +        if (is_signed) {
> +            long __acc = (long)acc;
> +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> +            assert(!overflow);
> +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> +            assert(!overflow);
> +            acc = (unsigned long)__acc;
> +        } else {
> +            overflow = __builtin_umull_overflow(acc, base, &acc);
> +            assert(!overflow);
> +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> +            assert(!overflow);
> +        }
> +

Unfortunately my use of these builtins isn't loved by older compilers,
like the one used by the build-centos7 pipeline in our gitlab CI. I
could wrap them in an #if GCC_VERSION >= 50100 and just have the old
'acc = acc * base + c' as the fallback, but that's not pretty and
would also mean that clang would use the fallback too. Maybe we can
try and make our compiler.h more fancy in order to provide a
COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
both gcc and clang. Or, we could just forgot the overflow checking.

Anybody else have suggestions? Paolo? Thomas?

Thanks,
drew
Andre Przywara March 23, 2021, 1 p.m. UTC | #5
On Tue, 23 Mar 2021 13:14:15 +0100
Andrew Jones <drjones@redhat.com> wrote:

Hi,

> On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:
> > @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
> >              c = *s - 'A' + 10;
> >          else
> >              break;
> > -        acc = acc * base + c;
> > +
> > +        if (is_signed) {
> > +            long __acc = (long)acc;
> > +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> > +            assert(!overflow);
> > +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> > +            assert(!overflow);
> > +            acc = (unsigned long)__acc;
> > +        } else {
> > +            overflow = __builtin_umull_overflow(acc, base, &acc);
> > +            assert(!overflow);
> > +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> > +            assert(!overflow);
> > +        }
> > +  
> 
> Unfortunately my use of these builtins isn't loved by older compilers,
> like the one used by the build-centos7 pipeline in our gitlab CI. I
> could wrap them in an #if GCC_VERSION >= 50100 and just have the old
> 'acc = acc * base + c' as the fallback, but that's not pretty and
> would also mean that clang would use the fallback too. Maybe we can
> try and make our compiler.h more fancy in order to provide a
> COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
> both gcc and clang. Or, we could just forgot the overflow checking.

In line with my email from yesterday:
Before we go down the path of all evil (premature optimisation!), can't
we just copy
https://git.kernel.org/pub/scm/libs/klibc/klibc.git/tree/usr/klibc/strntoumax.c
and have a tested version that works everywhere? This is BSD/GPL dual
licensed, IIUC.
I don't really see the reason to performance optimise strtol in the
context of kvm-unit-tests.

Cheers,
Andre
Thomas Huth March 23, 2021, 1:01 p.m. UTC | #6
On 23/03/2021 13.14, Andrew Jones wrote:
> On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:
>> @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
>>               c = *s - 'A' + 10;
>>           else
>>               break;
>> -        acc = acc * base + c;
>> +
>> +        if (is_signed) {
>> +            long __acc = (long)acc;
>> +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
>> +            assert(!overflow);
>> +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
>> +            assert(!overflow);
>> +            acc = (unsigned long)__acc;
>> +        } else {
>> +            overflow = __builtin_umull_overflow(acc, base, &acc);
>> +            assert(!overflow);
>> +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
>> +            assert(!overflow);
>> +        }
>> +
> 
> Unfortunately my use of these builtins isn't loved by older compilers,
> like the one used by the build-centos7 pipeline in our gitlab CI. I
> could wrap them in an #if GCC_VERSION >= 50100 and just have the old
> 'acc = acc * base + c' as the fallback, but that's not pretty and
> would also mean that clang would use the fallback too. Maybe we can
> try and make our compiler.h more fancy in order to provide a
> COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
> both gcc and clang. Or, we could just forgot the overflow checking.
> 
> Anybody else have suggestions? Paolo? Thomas?

What does a "normal" libc implementation do (e.g. glibc)? If it is also not 
doing overflow checking, I think we also don't need it in the kvm-unit-tests.

  Thomas
Andrew Jones March 23, 2021, 1:31 p.m. UTC | #7
On Tue, Mar 23, 2021 at 02:01:47PM +0100, Thomas Huth wrote:
> On 23/03/2021 13.14, Andrew Jones wrote:
> > On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:
> > > @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
> > >               c = *s - 'A' + 10;
> > >           else
> > >               break;
> > > -        acc = acc * base + c;
> > > +
> > > +        if (is_signed) {
> > > +            long __acc = (long)acc;
> > > +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> > > +            assert(!overflow);
> > > +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> > > +            assert(!overflow);
> > > +            acc = (unsigned long)__acc;
> > > +        } else {
> > > +            overflow = __builtin_umull_overflow(acc, base, &acc);
> > > +            assert(!overflow);
> > > +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> > > +            assert(!overflow);
> > > +        }
> > > +
> > 
> > Unfortunately my use of these builtins isn't loved by older compilers,
> > like the one used by the build-centos7 pipeline in our gitlab CI. I
> > could wrap them in an #if GCC_VERSION >= 50100 and just have the old
> > 'acc = acc * base + c' as the fallback, but that's not pretty and
> > would also mean that clang would use the fallback too. Maybe we can
> > try and make our compiler.h more fancy in order to provide a
> > COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
> > both gcc and clang. Or, we could just forgot the overflow checking.
> > 
> > Anybody else have suggestions? Paolo? Thomas?
> 
> What does a "normal" libc implementation do (e.g. glibc)? If it is also not
> doing overflow checking, I think we also don't need it in the
> kvm-unit-tests.
>

You'll get LONG_MAX for strtol and ULONG_MAX for strtoul and errno will be
set to ERANGE.

Thanks,
drew
Andrew Jones March 23, 2021, 1:41 p.m. UTC | #8
On Tue, Mar 23, 2021 at 01:00:01PM +0000, Andre Przywara wrote:
> On Tue, 23 Mar 2021 13:14:15 +0100
> Andrew Jones <drjones@redhat.com> wrote:
> 
> Hi,
> 
> > On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:
> > > @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
> > >              c = *s - 'A' + 10;
> > >          else
> > >              break;
> > > -        acc = acc * base + c;
> > > +
> > > +        if (is_signed) {
> > > +            long __acc = (long)acc;
> > > +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> > > +            assert(!overflow);
> > > +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> > > +            assert(!overflow);
> > > +            acc = (unsigned long)__acc;
> > > +        } else {
> > > +            overflow = __builtin_umull_overflow(acc, base, &acc);
> > > +            assert(!overflow);
> > > +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> > > +            assert(!overflow);
> > > +        }
> > > +  
> > 
> > Unfortunately my use of these builtins isn't loved by older compilers,
> > like the one used by the build-centos7 pipeline in our gitlab CI. I
> > could wrap them in an #if GCC_VERSION >= 50100 and just have the old
> > 'acc = acc * base + c' as the fallback, but that's not pretty and
> > would also mean that clang would use the fallback too. Maybe we can
> > try and make our compiler.h more fancy in order to provide a
> > COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
> > both gcc and clang. Or, we could just forgot the overflow checking.
> 
> In line with my email from yesterday:
> Before we go down the path of all evil (premature optimisation!), can't
> we just copy
> https://git.kernel.org/pub/scm/libs/klibc/klibc.git/tree/usr/klibc/strntoumax.c
> and have a tested version that works everywhere? This is BSD/GPL dual
> licensed, IIUC.
> I don't really see the reason to performance optimise strtol in the
> context of kvm-unit-tests.
>

Using the builtin isn't to optimize, it's to simplify. Checking for
overflow on multiplication is ugly business. As I said yesterday,
klibc doesn't do any error checking. We could choose to go that
way too, but I'd prefer we give a best effort to making the test
framework robust.

I quick pulled together the diff below. This gives us the overflow
checking when not using old compilers, but just falls back to the
simple math otherwise. Unless people have strong opinions about
that, then I'm inclined to go with it.

Thanks,
drew


diff --git a/lib/linux/compiler.h b/lib/linux/compiler.h
index 2d72f18c36e5..311da9807932 100644
--- a/lib/linux/compiler.h
+++ b/lib/linux/compiler.h
@@ -8,6 +8,20 @@
 
 #ifndef __ASSEMBLY__
 
+#define GCC_VERSION (__GNUC__ * 10000           \
+                    + __GNUC_MINOR__ * 100     \
+                    + __GNUC_PATCHLEVEL__)
+
+#ifdef __clang__
+#if __has_builtin(__builtin_mul_overflow) && \
+    __has_builtin(__builtin_add_overflow) && \
+    __has_builtin(__builtin_sub_overflow)
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
+#elif GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
+
 #include <stdint.h>
 
 #define barrier()      asm volatile("" : : : "memory")
diff --git a/lib/string.c b/lib/string.c
index b684271bb18f..e323908fe24e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -7,6 +7,7 @@
 
 #include "libcflat.h"
 #include "stdlib.h"
+#include "linux/compiler.h"
 
 size_t strlen(const char *buf)
 {
@@ -171,7 +172,6 @@ static unsigned long __strtol(const char *nptr, char **endptr,
                               int base, bool is_signed) {
     unsigned long acc = 0;
     const char *s = nptr;
-    bool overflow;
     int neg, c;
 
     assert(base == 0 || (base >= 2 && base <= 36));
@@ -210,19 +210,23 @@ static unsigned long __strtol(const char *nptr, char **endptr,
         else
             break;
 
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
         if (is_signed) {
             long __acc = (long)acc;
-            overflow = __builtin_smull_overflow(__acc, base, &__acc);
+            bool overflow = __builtin_smull_overflow(__acc, base, &__acc);
             assert(!overflow);
             overflow = __builtin_saddl_overflow(__acc, c, &__acc);
             assert(!overflow);
             acc = (unsigned long)__acc;
         } else {
-            overflow = __builtin_umull_overflow(acc, base, &acc);
+            bool overflow = __builtin_umull_overflow(acc, base, &acc);
             assert(!overflow);
             overflow = __builtin_uaddl_overflow(acc, c, &acc);
             assert(!overflow);
         }
+#else
+        acc = acc * base + c;
+#endif
 
         s++;
     }
Andre Przywara March 23, 2021, 4:11 p.m. UTC | #9
On Tue, 23 Mar 2021 14:41:21 +0100
Andrew Jones <drjones@redhat.com> wrote:

Hi,

> On Tue, Mar 23, 2021 at 01:00:01PM +0000, Andre Przywara wrote:
> > On Tue, 23 Mar 2021 13:14:15 +0100
> > Andrew Jones <drjones@redhat.com> wrote:
> > 
> > Hi,
> >   
> > > On Mon, Mar 22, 2021 at 09:35:23AM +0100, Andrew Jones wrote:  
> > > > @@ -208,23 +209,46 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
> > > >              c = *s - 'A' + 10;
> > > >          else
> > > >              break;
> > > > -        acc = acc * base + c;
> > > > +
> > > > +        if (is_signed) {
> > > > +            long __acc = (long)acc;
> > > > +            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> > > > +            assert(!overflow);
> > > > +            overflow = __builtin_saddl_overflow(__acc, c, &__acc);
> > > > +            assert(!overflow);
> > > > +            acc = (unsigned long)__acc;
> > > > +        } else {
> > > > +            overflow = __builtin_umull_overflow(acc, base, &acc);
> > > > +            assert(!overflow);
> > > > +            overflow = __builtin_uaddl_overflow(acc, c, &acc);
> > > > +            assert(!overflow);
> > > > +        }
> > > > +    
> > > 
> > > Unfortunately my use of these builtins isn't loved by older compilers,
> > > like the one used by the build-centos7 pipeline in our gitlab CI. I
> > > could wrap them in an #if GCC_VERSION >= 50100 and just have the old
> > > 'acc = acc * base + c' as the fallback, but that's not pretty and
> > > would also mean that clang would use the fallback too. Maybe we can
> > > try and make our compiler.h more fancy in order to provide a
> > > COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW define like linux does for
> > > both gcc and clang. Or, we could just forgot the overflow checking.  
> > 
> > In line with my email from yesterday:
> > Before we go down the path of all evil (premature optimisation!), can't
> > we just copy
> > https://git.kernel.org/pub/scm/libs/klibc/klibc.git/tree/usr/klibc/strntoumax.c
> > and have a tested version that works everywhere? This is BSD/GPL dual
> > licensed, IIUC.
> > I don't really see the reason to performance optimise strtol in the
> > context of kvm-unit-tests.
> >  
> 
> Using the builtin isn't to optimize, it's to simplify. Checking for
> overflow on multiplication is ugly business. As I said yesterday,
> klibc doesn't do any error checking.

Argh, sorry, I missed your reply yesterday in a bunch of other emails!

> We could choose to go that
> way too, but I'd prefer we give a best effort to making the test
> framework robust.

I agree, klibc was just some example, I didn't look too closely into
it. If it lacks, we should indeed not use it.

I just felt we are going through all the special cases of those
functions again, when people elsewhere checked all of them already. I
had some unpleasant experience with implementing a seemingly simple
memcpy() last year, with some surprising corner cases, so grew a bit
wary about re-implementing standard stuff and hoping it's all good.

Cheers,
Andre

> I quick pulled together the diff below. This gives us the overflow
> checking when not using old compilers, but just falls back to the
> simple math otherwise. Unless people have strong opinions about
> that, then I'm inclined to go with it.
> 
> Thanks,
> drew
> 
> 
> diff --git a/lib/linux/compiler.h b/lib/linux/compiler.h
> index 2d72f18c36e5..311da9807932 100644
> --- a/lib/linux/compiler.h
> +++ b/lib/linux/compiler.h
> @@ -8,6 +8,20 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +#define GCC_VERSION (__GNUC__ * 10000           \
> +                    + __GNUC_MINOR__ * 100     \
> +                    + __GNUC_PATCHLEVEL__)
> +
> +#ifdef __clang__
> +#if __has_builtin(__builtin_mul_overflow) && \
> +    __has_builtin(__builtin_add_overflow) && \
> +    __has_builtin(__builtin_sub_overflow)
> +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
> +#endif
> +#elif GCC_VERSION >= 50100
> +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
> +#endif
> +
>  #include <stdint.h>
>  
>  #define barrier()      asm volatile("" : : : "memory")
> diff --git a/lib/string.c b/lib/string.c
> index b684271bb18f..e323908fe24e 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -7,6 +7,7 @@
>  
>  #include "libcflat.h"
>  #include "stdlib.h"
> +#include "linux/compiler.h"
>  
>  size_t strlen(const char *buf)
>  {
> @@ -171,7 +172,6 @@ static unsigned long __strtol(const char *nptr, char **endptr,
>                                int base, bool is_signed) {
>      unsigned long acc = 0;
>      const char *s = nptr;
> -    bool overflow;
>      int neg, c;
>  
>      assert(base == 0 || (base >= 2 && base <= 36));
> @@ -210,19 +210,23 @@ static unsigned long __strtol(const char *nptr, char **endptr,
>          else
>              break;
>  
> +#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
>          if (is_signed) {
>              long __acc = (long)acc;
> -            overflow = __builtin_smull_overflow(__acc, base, &__acc);
> +            bool overflow = __builtin_smull_overflow(__acc, base, &__acc);
>              assert(!overflow);
>              overflow = __builtin_saddl_overflow(__acc, c, &__acc);
>              assert(!overflow);
>              acc = (unsigned long)__acc;
>          } else {
> -            overflow = __builtin_umull_overflow(acc, base, &acc);
> +            bool overflow = __builtin_umull_overflow(acc, base, &acc);
>              assert(!overflow);
>              overflow = __builtin_uaddl_overflow(acc, c, &acc);
>              assert(!overflow);
>          }
> +#else
> +        acc = acc * base + c;
> +#endif
>  
>          s++;
>      }
>
diff mbox series

Patch

diff --git a/lib/stdlib.h b/lib/stdlib.h
new file mode 100644
index 0000000..e8abe75
--- /dev/null
+++ b/lib/stdlib.h
@@ -0,0 +1,12 @@ 
+/*
+ * Header for libc stdlib functions
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Library General Public License version 2.
+ */
+#ifndef _STDLIB_H_
+#define _STDLIB_H_
+
+unsigned long int strtoul(const char *nptr, char **endptr, int base);
+
+#endif /* _STDLIB_H_ */
diff --git a/lib/string.h b/lib/string.h
index 493d51b..8da687e 100644
--- a/lib/string.h
+++ b/lib/string.h
@@ -7,12 +7,14 @@ 
 #ifndef __STRING_H
 #define __STRING_H
 
-extern unsigned long strlen(const char *buf);
+extern size_t strlen(const char *buf);
+extern size_t strnlen(const char *buf, size_t maxlen);
 extern char *strcat(char *dest, const char *src);
 extern char *strcpy(char *dest, const char *src);
 extern int strcmp(const char *a, const char *b);
 extern int strncmp(const char *a, const char *b, size_t n);
 extern char *strchr(const char *s, int c);
+extern char *strrchr(const char *s, int c);
 extern char *strstr(const char *haystack, const char *needle);
 extern void *memset(void *s, int c, size_t n);
 extern void *memcpy(void *dest, const void *src, size_t n);
diff --git a/lib/string.c b/lib/string.c
index 75257f5..f77881f 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -6,16 +6,26 @@ 
  */
 
 #include "libcflat.h"
+#include "stdlib.h"
 
-unsigned long strlen(const char *buf)
+size_t strlen(const char *buf)
 {
-    unsigned long len = 0;
+    size_t len = 0;
 
     while (*buf++)
 	++len;
     return len;
 }
 
+size_t strnlen(const char *buf, size_t maxlen)
+{
+    const char *sc;
+
+    for (sc = buf; maxlen-- && *sc != '\0'; ++sc)
+        /* nothing */;
+    return sc - buf;
+}
+
 char *strcat(char *dest, const char *src)
 {
     char *p = dest;
@@ -55,6 +65,16 @@  char *strchr(const char *s, int c)
     return (char *)s;
 }
 
+char *strrchr(const char *s, int c)
+{
+    const char *last = NULL;
+    do {
+        if (*s == (char)c)
+            last = s;
+    } while (*s++);
+    return (char *)last;
+}
+
 char *strstr(const char *s1, const char *s2)
 {
     size_t l1, l2;
@@ -135,13 +155,21 @@  void *memchr(const void *s, int c, size_t n)
     return NULL;
 }
 
-long atol(const char *ptr)
+static int isspace(int c)
+{
+    return c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r';
+}
+
+unsigned long int strtoul(const char *nptr, char **endptr, int base)
 {
     long acc = 0;
-    const char *s = ptr;
+    const char *s = nptr;
     int neg, c;
 
-    while (*s == ' ' || *s == '\t')
+    if (base < 0 || base == 1 || base > 32)
+        goto out; // errno = EINVAL
+
+    while (isspace(*s))
         s++;
     if (*s == '-'){
         neg = 1;
@@ -152,20 +180,46 @@  long atol(const char *ptr)
             s++;
     }
 
+    if (base == 0 || base == 16) {
+        if (*s == '0') {
+            s++;
+            if (*s == 'x') {
+                 s++;
+                 base = 16;
+            } else if (base == 0)
+                 base = 8;
+        } else if (base == 0)
+            base = 10;
+    }
+
     while (*s) {
-        if (*s < '0' || *s > '9')
+        if (*s >= '0' && *s < '0' + base && *s <= '9')
+            c = *s - '0';
+        else if (*s >= 'a' && *s < 'a' + base - 10)
+            c = *s - 'a' + 10;
+        else if (*s >= 'A' && *s < 'A' + base - 10)
+            c = *s - 'A' + 10;
+        else
             break;
-        c = *s - '0';
-        acc = acc * 10 + c;
+        acc = acc * base + c;
         s++;
     }
 
     if (neg)
         acc = -acc;
 
+ out:
+    if (endptr)
+        *endptr = (char *)s;
+
     return acc;
 }
 
+long atol(const char *ptr)
+{
+    return strtoul(ptr, NULL, 10);
+}
+
 extern char **environ;
 
 char *getenv(const char *name)