Message ID | 5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | seq_file: Optimize seq_puts() | expand |
Le 04/01/2024 à 14:29, Christophe JAILLET a écrit : > Most of seq_puts() usages are done with a string literal. In such cases, > the length of the string car be computed at compile time in order to save > a strlen() call at run-time. seq_write() can then be used instead. > > This saves a few cycles. > > To have an estimation of how often this optimization triggers: > $ git grep seq_puts.*\" | wc -l > 3391 > > Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Hi, any feed-back on this small optimisation of seq_puts()? Most of its usage would be optimized and a strlen() would be saved in all the corresponding cases. $ git grep seq_puts.*\" | wc -l 3436 $ git grep seq_puts | wc -l 3644 CJ > --- > Checked by comparing the output of a few .s files. > Here is one of these outputs: > > $ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w > > call clk_prepare_unlock # > # drivers/clk/clk.c:3320: seq_puts(s, "}\n"); > movq %r12, %rdi # s, > + movl $2, %edx #, > movq $.LC66, %rsi #, > - call seq_puts # > + call seq_write # > call __tsan_func_exit # > # drivers/clk/clk.c:3322: } > xorl %eax, %eax # > @@ -34520,6 +34521,7 @@ > popq %rbp # > popq %r12 # > -- > # drivers/clk/clk.c:3205: seq_puts(s, "-----"); > call __sanitizer_cov_trace_pc # > + movl $5, %edx #, > movq $.LC72, %rsi #, > movq %r13, %rdi # s, > - call seq_puts # > + call seq_write # > jmp .L2134 # > .L2144: > # drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core); > @@ -35225,20 +35228,23 @@ > leaq 240(%r12), %rdi #, tmp95 > call __tsan_read8 # > -- > movq %r12, %rdi # s, > + movq $.LC77, %rsi #, > # drivers/clk/clk.c:3244: struct hlist_head **lists = s->private; > movq 240(%r12), %rbp # s_9(D)->private, lists > # drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n"); > - call seq_puts # > + call seq_write # > # drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); > + movl $142, %edx #, > movq $.LC78, %rsi #, > movq %r12, %rdi # s, > - call seq_puts # > + call seq_write # > # drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); > + movl $142, %edx #, > movq $.LC79, %rsi #, > movq %r12, %rdi # s, > - call seq_puts # > + call seq_write # > # drivers/clk/clk.c:3251: clk_prepare_lock(); > call clk_prepare_lock # > .L2207: > @@ -37511,7 +37517,7 @@ > subq $16, %rsp #, > # drivers/clk/clk.c:3082: { > --- > fs/seq_file.c | 4 ++-- > include/linux/seq_file.h | 10 +++++++++- > 2 files changed, 11 insertions(+), 3 deletions(-) > > diff --git a/fs/seq_file.c b/fs/seq_file.c > index f5fdaf3b1572..8ef0a07033ca 100644 > --- a/fs/seq_file.c > +++ b/fs/seq_file.c > @@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c) > } > EXPORT_SYMBOL(seq_putc); > > -void seq_puts(struct seq_file *m, const char *s) > +void __seq_puts(struct seq_file *m, const char *s) > { > int len = strlen(s); > > @@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s) > memcpy(m->buf + m->count, s, len); > m->count += len; > } > -EXPORT_SYMBOL(seq_puts); > +EXPORT_SYMBOL(__seq_puts); > > /** > * seq_put_decimal_ull_width - A helper routine for putting decimal numbers > diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h > index 234bcdb1fba4..15abf45d62c5 100644 > --- a/include/linux/seq_file.h > +++ b/include/linux/seq_file.h > @@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); > __printf(2, 3) > void seq_printf(struct seq_file *m, const char *fmt, ...); > void seq_putc(struct seq_file *m, char c); > -void seq_puts(struct seq_file *m, const char *s); > +void __seq_puts(struct seq_file *m, const char *s); > +#define seq_puts(m, s) \ > +do { \ > + if (__builtin_constant_p(s)) \ > + seq_write(m, s, __builtin_strlen(s)); \ > + else \ > + __seq_puts(m, s); \ > +} while (0) > + > void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, > unsigned long long num, unsigned int width); > void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
On Mon, Apr 15, 2024 at 10:47:59PM +0200, Christophe JAILLET wrote: > Le 04/01/2024 à 14:29, Christophe JAILLET a écrit : > > Most of seq_puts() usages are done with a string literal. In such cases, > > the length of the string car be computed at compile time in order to save > > a strlen() call at run-time. seq_write() can then be used instead. > > > > This saves a few cycles. > > > > To have an estimation of how often this optimization triggers: > > $ git grep seq_puts.*\" | wc -l > > 3391 > > > > Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> > > Hi, > > any feed-back on this small optimisation of seq_puts()? > > +#define seq_puts(m, s) \ > > +do { \ > > + if (__builtin_constant_p(s)) \ > > + seq_write(m, s, __builtin_strlen(s)); \ > > + else \ > > + __seq_puts(m, s); \ > > +} while (0) > > + No need to make it a macro, actually. And I would suggest going a bit further: static inline void seq_puts(struct seq_file *m, const char *s) { if (!__builtin_constant_p(*s)) __seq_puts(m, s); else if (s[0] && !s[1]) seq_putc(m, s[0]); else seq_write(m, s, __builtin_strlen(s)); }
From: Al Viro > Sent: 15 April 2024 22:01 ... > No need to make it a macro, actually. And I would suggest going > a bit further: > > static inline void seq_puts(struct seq_file *m, const char *s) That probably needs to be 'always_inline'. > { > if (!__builtin_constant_p(*s)) > __seq_puts(m, s); > else if (s[0] && !s[1]) > seq_putc(m, s[0]); > else > seq_write(m, s, __builtin_strlen(s)); > } You missed seq_puts(m, ""); I did wonder about checking sizeof(s) <= 2 in the #define version. That would pick up the cases where a separator is changed/added in a loop. Could you do: size_t len = __builtin_strlen(s); if (!__builtin_constant_p(len)) __seq_puts(m, s); else switch (len){ case 0: break; case 1: seq_putc(m, s[0]); default: seq_write(m, s, len); } David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote: > > static inline void seq_puts(struct seq_file *m, const char *s) > > That probably needs to be 'always_inline'. What for? If compiler fails to inline it (and I'd be very surprised if that happened - if s is not a constant string, we get a straight call of __seq_puts() and for constant strings it boils down to call of seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad would happen; we'd still get correct behaviour. > > { > > if (!__builtin_constant_p(*s)) > > __seq_puts(m, s); > > else if (s[0] && !s[1]) > > seq_putc(m, s[0]); > > else > > seq_write(m, s, __builtin_strlen(s)); > > } > > You missed seq_puts(m, ""); Where have you seen one? And if it gets less than optimal, who cares? > Could you do: > size_t len = __builtin_strlen(s); > if (!__builtin_constant_p(len)) > __seq_puts(m, s); > else switch (len){ > case 0: break; > case 1: seq_putc(m, s[0]); > default: seq_write(m, s, len); > } Umm... That's probably OK, but I wonder how useful would that be...
On 15/04/2024 22.47, Christophe JAILLET wrote: > Le 04/01/2024 à 14:29, Christophe JAILLET a écrit : >> Most of seq_puts() usages are done with a string literal. In such cases, >> the length of the string car be computed at compile time in order to save >> a strlen() call at run-time. seq_write() can then be used instead. >> >> This saves a few cycles. >> >> To have an estimation of how often this optimization triggers: >> $ git grep seq_puts.*\" | wc -l >> 3391 >> >> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> > > Hi, > > any feed-back on this small optimisation of seq_puts()? While you're at it, could you change the implementation of the out-of-line seq_puts (or __seq_puts if it gets renamed to that) to simply be seq_write(seq, s, strlen(s)) instead of duplicating the overflow/memcpy logic. Rasmus
Le 17/04/2024 à 03:04, Al Viro a écrit : > On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote: > >>> static inline void seq_puts(struct seq_file *m, const char *s) >> >> That probably needs to be 'always_inline'. > > What for? If compiler fails to inline it (and I'd be very surprised > if that happened - if s is not a constant string, we get a straight call > of __seq_puts() and for constant strings it boils down to call of > seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad > would happen; we'd still get correct behaviour. > >>> { >>> if (!__builtin_constant_p(*s)) >>> __seq_puts(m, s); >>> else if (s[0] && !s[1]) >>> seq_putc(m, s[0]); >>> else >>> seq_write(m, s, __builtin_strlen(s)); >>> } >> >> You missed seq_puts(m, ""); > > Where have you seen one? Based on results from: git grep seq_puts.*\"\" there is no such cases. > And if it gets less than optimal, who cares? > >> Could you do: >> size_t len = __builtin_strlen(s); >> if (!__builtin_constant_p(len)) >> __seq_puts(m, s); >> else switch (len){ >> case 0: break; >> case 1: seq_putc(m, s[0]); missing break; >> default: seq_write(m, s, len); >> } > > Umm... That's probably OK, but I wonder how useful would that > be... > Thanks all for your feedback. I'll send a v2. CJ >
Le 16/04/2024 à 22:56, David Laight a écrit : > From: Al Viro >> Sent: 15 April 2024 22:01 > ... >> No need to make it a macro, actually. And I would suggest going >> a bit further: >> >> static inline void seq_puts(struct seq_file *m, const char *s) > > That probably needs to be 'always_inline'. > >> { >> if (!__builtin_constant_p(*s)) >> __seq_puts(m, s); >> else if (s[0] && !s[1]) >> seq_putc(m, s[0]); >> else >> seq_write(m, s, __builtin_strlen(s)); >> } > > You missed seq_puts(m, ""); > > I did wonder about checking sizeof(s) <= 2 in the #define version. git grep seq_puts.*\"[^\\].\" | wc -l 77 What would you do in this case? 2 seq_putc() in order to save a memcpy(..., 2), that's it? It would also slightly change the behaviour, as only the 1st char could be added. Actually, it is all or nothing. Don't know what is the best. Any thought? CJ > That would pick up the cases where a separator is changed/added > in a loop. > > Could you do: > size_t len = __builtin_strlen(s); > if (!__builtin_constant_p(len)) > __seq_puts(m, s); > else switch (len){ > case 0: break; > case 1: seq_putc(m, s[0]); > default: seq_write(m, s, len); > } > > David > > - > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK > Registration No: 1397386 (Wales) > > >
On Fri, Apr 19, 2024 at 10:38:15PM +0200, Christophe JAILLET wrote: > Le 16/04/2024 à 22:56, David Laight a écrit : > > From: Al Viro > > > Sent: 15 April 2024 22:01 > > ... > > > No need to make it a macro, actually. And I would suggest going > > > a bit further: > > > > > > static inline void seq_puts(struct seq_file *m, const char *s) > > > > That probably needs to be 'always_inline'. > > > > > { > > > if (!__builtin_constant_p(*s)) > > > __seq_puts(m, s); > > > else if (s[0] && !s[1]) > > > seq_putc(m, s[0]); > > > else > > > seq_write(m, s, __builtin_strlen(s)); > > > } > > > > You missed seq_puts(m, ""); > > > > I did wonder about checking sizeof(s) <= 2 in the #define version. > > git grep seq_puts.*\"[^\\].\" | wc -l > 77 > > What would you do in this case? > 2 seq_putc() in order to save a memcpy(..., 2), that's it? Not a damn thing - just have it call seq_write(). Note that if (s[0] && !s[1]) which triggers only on single-character strings.
From: Christophe JAILLET > Sent: 19 April 2024 21:38 ... > > I did wonder about checking sizeof(s) <= 2 in the #define version. > > git grep seq_puts.*\"[^\\].\" | wc -l > 77 > > What would you do in this case? > 2 seq_putc() in order to save a memcpy(..., 2), that's it? > > It would also slightly change the behaviour, as only the 1st char could > be added. Actually, it is all or nothing. Doing: if (sizeof(str) == 2 && str[0]) seq_putc(m. str[0]); else __seq_puts(m, str); Would pick up loops that do: char sep[2] = ""; for (;; sep[0] = ',') { ... seq_puts(m, sep); ... } as well as seq_puts(m, "x"); Whether that is worthwhile is another matter. But it might be used. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
diff --git a/fs/seq_file.c b/fs/seq_file.c index f5fdaf3b1572..8ef0a07033ca 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c) } EXPORT_SYMBOL(seq_putc); -void seq_puts(struct seq_file *m, const char *s) +void __seq_puts(struct seq_file *m, const char *s) { int len = strlen(s); @@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s) memcpy(m->buf + m->count, s, len); m->count += len; } -EXPORT_SYMBOL(seq_puts); +EXPORT_SYMBOL(__seq_puts); /** * seq_put_decimal_ull_width - A helper routine for putting decimal numbers diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 234bcdb1fba4..15abf45d62c5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); -void seq_puts(struct seq_file *m, const char *s); +void __seq_puts(struct seq_file *m, const char *s); +#define seq_puts(m, s) \ +do { \ + if (__builtin_constant_p(s)) \ + seq_write(m, s, __builtin_strlen(s)); \ + else \ + __seq_puts(m, s); \ +} while (0) + void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
Most of seq_puts() usages are done with a string literal. In such cases, the length of the string car be computed at compile time in order to save a strlen() call at run-time. seq_write() can then be used instead. This saves a few cycles. To have an estimation of how often this optimization triggers: $ git grep seq_puts.*\" | wc -l 3391 Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> --- Checked by comparing the output of a few .s files. Here is one of these outputs: $ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w call clk_prepare_unlock # # drivers/clk/clk.c:3320: seq_puts(s, "}\n"); movq %r12, %rdi # s, + movl $2, %edx #, movq $.LC66, %rsi #, - call seq_puts # + call seq_write # call __tsan_func_exit # # drivers/clk/clk.c:3322: } xorl %eax, %eax # @@ -34520,6 +34521,7 @@ popq %rbp # popq %r12 # -- # drivers/clk/clk.c:3205: seq_puts(s, "-----"); call __sanitizer_cov_trace_pc # + movl $5, %edx #, movq $.LC72, %rsi #, movq %r13, %rdi # s, - call seq_puts # + call seq_write # jmp .L2134 # .L2144: # drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core); @@ -35225,20 +35228,23 @@ leaq 240(%r12), %rdi #, tmp95 call __tsan_read8 # -- movq %r12, %rdi # s, + movq $.LC77, %rsi #, # drivers/clk/clk.c:3244: struct hlist_head **lists = s->private; movq 240(%r12), %rbp # s_9(D)->private, lists # drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n"); - call seq_puts # + call seq_write # # drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); + movl $142, %edx #, movq $.LC78, %rsi #, movq %r12, %rdi # s, - call seq_puts # + call seq_write # # drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + movl $142, %edx #, movq $.LC79, %rsi #, movq %r12, %rdi # s, - call seq_puts # + call seq_write # # drivers/clk/clk.c:3251: clk_prepare_lock(); call clk_prepare_lock # .L2207: @@ -37511,7 +37517,7 @@ subq $16, %rsp #, # drivers/clk/clk.c:3082: { --- fs/seq_file.c | 4 ++-- include/linux/seq_file.h | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-)