Message ID | 20240731-nolibc-scanf-v1-1-f71bcc4abb9e@weissschuh.net (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | tools/nolibc: add support for [v]sscanf() | expand |
On 7/31/24 12:32, Thomas Weißschuh wrote: > The implementation is limited and only supports numeric arguments. I would like to see more information in here. Why is this needed etc. etc. > > Signed-off-by: Thomas Weißschuh <linux@weissschuh.net> > --- > tools/include/nolibc/stdio.h | 93 ++++++++++++++++++++++++++++ > tools/testing/selftests/nolibc/nolibc-test.c | 59 ++++++++++++++++++ > 2 files changed, 152 insertions(+) > > diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h > index c968dbbc4ef8..d63c45c06d8e 100644 > --- a/tools/include/nolibc/stdio.h > +++ b/tools/include/nolibc/stdio.h > @@ -348,6 +348,99 @@ int printf(const char *fmt, ...) > return ret; > } > > +static __attribute__((unused)) > +int vsscanf(const char *str, const char *format, va_list args) Is there a reason why you didn't use the same code in lib/vsprintf.c? You could simply duplicate the code here? With all these libc functionality added, it isn't nolibc looks like :) > +{ > + uintmax_t uval; > + intmax_t ival; > + int base; > + char *endptr; > + int matches; > + int lpref; > + > + matches = 0; > + > + while (1) { > + if (*format == '%') { > + lpref = 0; > + format++; > + > + if (*format == 'l') { > + lpref = 1; > + format++; > + if (*format == 'l') { > + lpref = 2; > + format++; > + } > + } > + > + if (*format == '%') { > + if ('%' != *str) > + goto done; > + str++; > + format++; > + continue; > + } else if (*format == 'd') { > + ival = strtoll(str, &endptr, 10); > + if (lpref == 0) > + *va_arg(args, int *) = ival; > + else if (lpref == 1) > + *va_arg(args, long *) = ival; > + else if (lpref == 2) > + *va_arg(args, long long *) = ival; > + } else if (*format == 'u' || *format == 'x' || *format == 'X') { > + base = *format == 'u' ? 10 : 16; > + uval = strtoull(str, &endptr, base); > + if (lpref == 0) > + *va_arg(args, unsigned int *) = uval; > + else if (lpref == 1) > + *va_arg(args, unsigned long *) = uval; > + else if (lpref == 2) > + *va_arg(args, unsigned long long *) = uval; > + } else if (*format == 'p') { > + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); > + } else { > + SET_ERRNO(EILSEQ); > + goto done; > + } > + > + format++; > + str = endptr; > + matches++; > + > + } else if (*format == '\0') { > + goto done; > + } else if (isspace(*format)) { > + while (isspace(*format)) > + format++; > + while (isspace(*str)) > + str++; > + } else if (*format == *str) { > + format++; > + str++; > + } else { > + if (!matches) > + matches = EOF; > + goto done; > + } > + } > + > +done: > + return matches; > +} > + > +static __attribute__((unused, format(scanf, 2, 3))) > +int sscanf(const char *str, const char *format, ...) > +{ > + va_list args; > + int ret; > + > + va_start(args, format); > + ret = vsscanf(str, format, args); > + va_end(args); > + return ret; > +} > + > static __attribute__((unused)) > void perror(const char *msg) > { > diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c > index 093d0512f4c5..addbceb0b276 100644 > --- a/tools/testing/selftests/nolibc/nolibc-test.c > +++ b/tools/testing/selftests/nolibc/nolibc-test.c > @@ -1277,6 +1277,64 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm > return ret; > } > > +static int test_scanf(void) > +{ > + unsigned long long ull; > + unsigned long ul; > + unsigned int u; > + long long ll; > + long l; > + void *p; > + int i; > + > + if (sscanf("", "foo") != EOF) > + return 1; > + > + if (sscanf("foo", "foo") != 0) > + return 2; > + > + if (sscanf("123", "%d", &i) != 1) > + return 3; > + > + if (i != 123) > + return 4; > + > + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) > + return 5; > + > + if (i != 123) > + return 6; > + > + if (u != 456) > + return 7; > + > + if (p != (void *)0x90) > + return 8; > + > + if (sscanf("a b1", "a b%d", &i) != 1) > + return 9; > + > + if (i != 1) > + return 10; > + > + if (sscanf("a%1", "a%%%d", &i) != 1) > + return 11; > + > + if (i != 1) > + return 12; > + > + if (sscanf("1|2|3|4|5|6", > + "%d|%ld|%lld|%u|%lu|%llu", > + &i, &l, &ll, &u, &ul, &ull) != 6) > + return 13; > + > + if (i != 1 || l != 2 || ll != 3 || > + u != 4 || ul != 5 || ull != 6) > + return 14; > + > + return 0; Can we simplify this code? It is hard to read code with too many conditions. Maybe defining an array test conditions instead of a series ifs. > +} > + > static int run_vfprintf(int min, int max) > { > int test; > @@ -1298,6 +1356,7 @@ static int run_vfprintf(int min, int max) > CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; > CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; > CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; > + CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; > case __LINE__: > return ret; /* must be last */ > /* note: do not set any defaults so as to permit holes above */ > thanks, -- Shuah
On 2024-07-31 17:01:09+0000, Shuah Khan wrote: > On 7/31/24 12:32, Thomas Weißschuh wrote: > > The implementation is limited and only supports numeric arguments. > > I would like to see more information in here. Why is this needed > etc. etc. Ack. > > > > Signed-off-by: Thomas Weißschuh <linux@weissschuh.net> > > --- > > tools/include/nolibc/stdio.h | 93 ++++++++++++++++++++++++++++ > > tools/testing/selftests/nolibc/nolibc-test.c | 59 ++++++++++++++++++ > > 2 files changed, 152 insertions(+) > > > > diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h > > index c968dbbc4ef8..d63c45c06d8e 100644 > > --- a/tools/include/nolibc/stdio.h > > +++ b/tools/include/nolibc/stdio.h > > @@ -348,6 +348,99 @@ int printf(const char *fmt, ...) > > return ret; > > } > > +static __attribute__((unused)) > > +int vsscanf(const char *str, const char *format, va_list args) > > Is there a reason why you didn't use the same code in lib/vsprintf.c? > You could simply duplicate the code here? lib/vsprintf.c is GPL-2.0-only while nolibc is LGPL-2.1 OR MIT, so code reuse isn't really possible. Furthermore I think the vsprintf.c implements the custom kernel formats, while nolibc should use posix ones. > With all these libc functionality added, it isn't nolibc looks like :) Well :-) The main motivation is to provide kselftests compatibility. Maybe Willy disagrees. > > +{ > > + uintmax_t uval; > > + intmax_t ival; > > + int base; > > + char *endptr; > > + int matches; > > + int lpref; > > + > > + matches = 0; > > + > > + while (1) { > > + if (*format == '%') { > > + lpref = 0; > > + format++; > > + > > + if (*format == 'l') { > > + lpref = 1; > > + format++; > > + if (*format == 'l') { > > + lpref = 2; > > + format++; > > + } > > + } > > + > > + if (*format == '%') { > > + if ('%' != *str) > > + goto done; > > + str++; > > + format++; > > + continue; > > + } else if (*format == 'd') { > > + ival = strtoll(str, &endptr, 10); > > + if (lpref == 0) > > + *va_arg(args, int *) = ival; > > + else if (lpref == 1) > > + *va_arg(args, long *) = ival; > > + else if (lpref == 2) > > + *va_arg(args, long long *) = ival; > > + } else if (*format == 'u' || *format == 'x' || *format == 'X') { > > + base = *format == 'u' ? 10 : 16; > > + uval = strtoull(str, &endptr, base); > > + if (lpref == 0) > > + *va_arg(args, unsigned int *) = uval; > > + else if (lpref == 1) > > + *va_arg(args, unsigned long *) = uval; > > + else if (lpref == 2) > > + *va_arg(args, unsigned long long *) = uval; > > + } else if (*format == 'p') { > > + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); > > + } else { > > + SET_ERRNO(EILSEQ); > > + goto done; > > + } > > + > > + format++; > > + str = endptr; > > + matches++; > > + > > + } else if (*format == '\0') { > > + goto done; > > + } else if (isspace(*format)) { > > + while (isspace(*format)) > > + format++; > > + while (isspace(*str)) > > + str++; > > + } else if (*format == *str) { > > + format++; > > + str++; > > + } else { > > + if (!matches) > > + matches = EOF; > > + goto done; > > + } > > + } > > + > > +done: > > + return matches; > > +} > > + > > +static __attribute__((unused, format(scanf, 2, 3))) > > +int sscanf(const char *str, const char *format, ...) > > +{ > > + va_list args; > > + int ret; > > + > > + va_start(args, format); > > + ret = vsscanf(str, format, args); > > + va_end(args); > > + return ret; > > +} > > + > > static __attribute__((unused)) > > void perror(const char *msg) > > { > > diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c > > index 093d0512f4c5..addbceb0b276 100644 > > --- a/tools/testing/selftests/nolibc/nolibc-test.c > > +++ b/tools/testing/selftests/nolibc/nolibc-test.c > > @@ -1277,6 +1277,64 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm > > return ret; > > } > > +static int test_scanf(void) > > +{ > > + unsigned long long ull; > > + unsigned long ul; > > + unsigned int u; > > + long long ll; > > + long l; > > + void *p; > > + int i; > > + > > + if (sscanf("", "foo") != EOF) > > + return 1; > > + > > + if (sscanf("foo", "foo") != 0) > > + return 2; > > + > > + if (sscanf("123", "%d", &i) != 1) > > + return 3; > > + > > + if (i != 123) > > + return 4; > > + > > + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) > > + return 5; > > + > > + if (i != 123) > > + return 6; > > + > > + if (u != 456) > > + return 7; > > + > > + if (p != (void *)0x90) > > + return 8; > > + > > + if (sscanf("a b1", "a b%d", &i) != 1) > > + return 9; > > + > > + if (i != 1) > > + return 10; > > + > > + if (sscanf("a%1", "a%%%d", &i) != 1) > > + return 11; > > + > > + if (i != 1) > > + return 12; > > + > > + if (sscanf("1|2|3|4|5|6", > > + "%d|%ld|%lld|%u|%lu|%llu", > > + &i, &l, &ll, &u, &ul, &ull) != 6) > > + return 13; > > + > > + if (i != 1 || l != 2 || ll != 3 || > > + u != 4 || ul != 5 || ull != 6) > > + return 14; > > + > > + return 0; > > Can we simplify this code? It is hard to read code with too > many conditions. Maybe defining an array test conditions > instead of a series ifs. I tried that and didn't find a way. Any pointers are welcome. > > + > > + > > static int run_vfprintf(int min, int max) > > { > > int test; > > @@ -1298,6 +1356,7 @@ static int run_vfprintf(int min, int max) > > CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; > > CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; > > CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; > > + CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; > > case __LINE__: > > return ret; /* must be last */ > > /* note: do not set any defaults so as to permit holes above */ > > > > thanks, > -- Shuah
On 8/2/24 09:48, Thomas Weißschuh wrote: > On 2024-07-31 17:01:09+0000, Shuah Khan wrote: >> On 7/31/24 12:32, Thomas Weißschuh wrote: >>> The implementation is limited and only supports numeric arguments. >> >> I would like to see more information in here. Why is this needed >> etc. etc. > > Ack. > >>> >>> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net> >>> --- >>> tools/include/nolibc/stdio.h | 93 ++++++++++++++++++++++++++++ >>> tools/testing/selftests/nolibc/nolibc-test.c | 59 ++++++++++++++++++ >>> 2 files changed, 152 insertions(+) >>> >>> diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h >>> index c968dbbc4ef8..d63c45c06d8e 100644 >>> --- a/tools/include/nolibc/stdio.h >>> +++ b/tools/include/nolibc/stdio.h >>> @@ -348,6 +348,99 @@ int printf(const char *fmt, ...) >>> return ret; >>> } >>> +static __attribute__((unused)) >>> +int vsscanf(const char *str, const char *format, va_list args) >> >> Is there a reason why you didn't use the same code in lib/vsprintf.c? >> You could simply duplicate the code here? > > lib/vsprintf.c is GPL-2.0-only while nolibc is LGPL-2.1 OR MIT, > so code reuse isn't really possible. > Furthermore I think the vsprintf.c implements the custom kernel formats, > while nolibc should use posix ones. Ack. > >> With all these libc functionality added, it isn't nolibc looks like :) > > Well :-) > > The main motivation is to provide kselftests compatibility. > Maybe Willy disagrees. > >>> +{ >>> +done: >>> + return matches; >>> +} >>> + >>> +static __attribute__((unused, format(scanf, 2, 3))) >>> +int sscanf(const char *str, const char *format, ...) >>> +{ >>> + va_list args; >>> + int ret; >>> + >>> + va_start(args, format); >>> + ret = vsscanf(str, format, args); >>> + va_end(args); >>> + return ret; >>> +} >>> + >>> static __attribute__((unused)) >>> void perror(const char *msg) >>> { >>> diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c >>> index 093d0512f4c5..addbceb0b276 100644 >>> --- a/tools/testing/selftests/nolibc/nolibc-test.c >>> +++ b/tools/testing/selftests/nolibc/nolibc-test.c >>> @@ -1277,6 +1277,64 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm >>> return ret; >>> } >>> +static int test_scanf(void) Is there a rationale for the return values 1 - 14. It will be easier to understand if there are comments in the code. >>> +{ >>> + unsigned long long ull; >>> + unsigned long ul; >>> + unsigned int u; >>> + long long ll; >>> + long l; >>> + void *p; >>> + int i; >>> + >>> + if (sscanf("", "foo") != EOF) >>> + return 1; >>> + >>> + if (sscanf("foo", "foo") != 0) >>> + return 2; >>> + >>> + if (sscanf("123", "%d", &i) != 1) >>> + return 3;>>> + >>> + if (i != 123) >>> + return 4; >>> + >>> + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) >>> + return 5; >>> + >>> + if (i != 123) >>> + return 6; >>> + >>> + if (u != 456) >>> + return 7; >>> + >>> + if (p != (void *)0x90) >>> + return 8; >>> + >>> + if (sscanf("a b1", "a b%d", &i) != 1) >>> + return 9; >>> + >>> + if (i != 1) >>> + return 10; >>> + >>> + if (sscanf("a%1", "a%%%d", &i) != 1) >>> + return 11; >>> + >>> + if (i != 1) >>> + return 12; >>> + >>> + if (sscanf("1|2|3|4|5|6", >>> + "%d|%ld|%lld|%u|%lu|%llu", >>> + &i, &l, &ll, &u, &ul, &ull) != 6) >>> + return 13; >>> + >>> + if (i != 1 || l != 2 || ll != 3 || >>> + u != 4 || ul != 5 || ull != 6) >>> + return 14; >>> + >>> + return 0; >> >> Can we simplify this code? It is hard to read code with too >> many conditions. Maybe defining an array test conditions >> instead of a series ifs. > > I tried that and didn't find a way. > Any pointers are welcome. I played with this some and couldn't think of way to simplify this without making it hard to read. It would help adding comments though. thanks, -- Shuah
On Fri, Aug 02, 2024 at 05:48:13PM +0200, Thomas Weißschuh wrote: > > With all these libc functionality added, it isn't nolibc looks like :) > > Well :-) > > The main motivation is to provide kselftests compatibility. > Maybe Willy disagrees. No no I'm perfectly fine with adding the functions that developers use or need to write their test or init tools. I don't have any strong opinion on scanf(). Just like strtok(), I stopped using it 25 years ago when I noticed that it never survives code evolutions, lacks a lot of flexibility and is often strongly tied to your types (more than printf where you can cast). But I perfectly understand that others are used to it and would appreciate to have it, for example if it helps with command line arguments. > > > +static int test_scanf(void) > > > +{ > > > + unsigned long long ull; > > > + unsigned long ul; > > > + unsigned int u; > > > + long long ll; > > > + long l; > > > + void *p; > > > + int i; > > > + > > > + if (sscanf("", "foo") != EOF) > > > + return 1; > > > + > > > + if (sscanf("foo", "foo") != 0) > > > + return 2; > > > + > > > + if (sscanf("123", "%d", &i) != 1) > > > + return 3; > > > + > > > + if (i != 123) > > > + return 4; > > > + > > > + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) > > > + return 5; > > > + > > > + if (i != 123) > > > + return 6; > > > + > > > + if (u != 456) > > > + return 7; > > > + > > > + if (p != (void *)0x90) > > > + return 8; > > > + > > > + if (sscanf("a b1", "a b%d", &i) != 1) > > > + return 9; > > > + > > > + if (i != 1) > > > + return 10; > > > + > > > + if (sscanf("a%1", "a%%%d", &i) != 1) > > > + return 11; > > > + > > > + if (i != 1) > > > + return 12; > > > + > > > + if (sscanf("1|2|3|4|5|6", > > > + "%d|%ld|%lld|%u|%lu|%llu", > > > + &i, &l, &ll, &u, &ul, &ull) != 6) > > > + return 13; > > > + > > > + if (i != 1 || l != 2 || ll != 3 || > > > + u != 4 || ul != 5 || ull != 6) > > > + return 14; > > > + > > > + return 0; > > > > Can we simplify this code? It is hard to read code with too > > many conditions. Maybe defining an array test conditions > > instead of a series ifs. > > I tried that and didn't find a way. > Any pointers are welcome. I think it would be difficult by nature of varargs. However, since you grouped some expressions, maybe a one-liner comment between each scanf() to explain the intent of the test would make it easier to follow. E.g: /* test multiple naked numbers */ ... /* test numbers delimited with a character */ ... /* test multiple integer types at once */ etc. This allows the reviewer to more easly re-focus on the test they were reading. Willy
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index c968dbbc4ef8..d63c45c06d8e 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -348,6 +348,99 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused)) +int vsscanf(const char *str, const char *format, va_list args) +{ + uintmax_t uval; + intmax_t ival; + int base; + char *endptr; + int matches; + int lpref; + + matches = 0; + + while (1) { + if (*format == '%') { + lpref = 0; + format++; + + if (*format == 'l') { + lpref = 1; + format++; + if (*format == 'l') { + lpref = 2; + format++; + } + } + + if (*format == '%') { + if ('%' != *str) + goto done; + str++; + format++; + continue; + } else if (*format == 'd') { + ival = strtoll(str, &endptr, 10); + if (lpref == 0) + *va_arg(args, int *) = ival; + else if (lpref == 1) + *va_arg(args, long *) = ival; + else if (lpref == 2) + *va_arg(args, long long *) = ival; + } else if (*format == 'u' || *format == 'x' || *format == 'X') { + base = *format == 'u' ? 10 : 16; + uval = strtoull(str, &endptr, base); + if (lpref == 0) + *va_arg(args, unsigned int *) = uval; + else if (lpref == 1) + *va_arg(args, unsigned long *) = uval; + else if (lpref == 2) + *va_arg(args, unsigned long long *) = uval; + } else if (*format == 'p') { + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); + } else { + SET_ERRNO(EILSEQ); + goto done; + } + + format++; + str = endptr; + matches++; + + } else if (*format == '\0') { + goto done; + } else if (isspace(*format)) { + while (isspace(*format)) + format++; + while (isspace(*str)) + str++; + } else if (*format == *str) { + format++; + str++; + } else { + if (!matches) + matches = EOF; + goto done; + } + } + +done: + return matches; +} + +static __attribute__((unused, format(scanf, 2, 3))) +int sscanf(const char *str, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vsscanf(str, format, args); + va_end(args); + return ret; +} + static __attribute__((unused)) void perror(const char *msg) { diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 093d0512f4c5..addbceb0b276 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1277,6 +1277,64 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm return ret; } +static int test_scanf(void) +{ + unsigned long long ull; + unsigned long ul; + unsigned int u; + long long ll; + long l; + void *p; + int i; + + if (sscanf("", "foo") != EOF) + return 1; + + if (sscanf("foo", "foo") != 0) + return 2; + + if (sscanf("123", "%d", &i) != 1) + return 3; + + if (i != 123) + return 4; + + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) + return 5; + + if (i != 123) + return 6; + + if (u != 456) + return 7; + + if (p != (void *)0x90) + return 8; + + if (sscanf("a b1", "a b%d", &i) != 1) + return 9; + + if (i != 1) + return 10; + + if (sscanf("a%1", "a%%%d", &i) != 1) + return 11; + + if (i != 1) + return 12; + + if (sscanf("1|2|3|4|5|6", + "%d|%ld|%lld|%u|%lu|%llu", + &i, &l, &ll, &u, &ul, &ull) != 6) + return 13; + + if (i != 1 || l != 2 || ll != 3 || + u != 4 || ul != 5 || ull != 6) + return 14; + + return 0; +} + static int run_vfprintf(int min, int max) { int test; @@ -1298,6 +1356,7 @@ static int run_vfprintf(int min, int max) CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; + CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */
The implementation is limited and only supports numeric arguments. Signed-off-by: Thomas Weißschuh <linux@weissschuh.net> --- tools/include/nolibc/stdio.h | 93 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 59 ++++++++++++++++++ 2 files changed, 152 insertions(+)