diff mbox series

seq_file: Optimize seq_puts()

Message ID 5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr (mailing list archive)
State New, archived
Headers show
Series seq_file: Optimize seq_puts() | expand

Commit Message

Christophe JAILLET Jan. 4, 2024, 1:29 p.m. UTC
Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_write() can then be used instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
   $ git grep seq_puts.*\" | wc -l
   3391

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

 	call	clk_prepare_unlock	#
 # drivers/clk/clk.c:3320: 	seq_puts(s, "}\n");
 	movq	%r12, %rdi	# s,
+	movl	$2, %edx	#,
 	movq	$.LC66, %rsi	#,
-	call	seq_puts	#
+	call	seq_write	#
 	call	__tsan_func_exit	#
 # drivers/clk/clk.c:3322: }
 	xorl	%eax, %eax	#
@@ -34520,6 +34521,7 @@
 	popq	%rbp	#
 	popq	%r12	#
--
 # drivers/clk/clk.c:3205: 		seq_puts(s, "-----");
 	call	__sanitizer_cov_trace_pc	#
+	movl	$5, %edx	#,
 	movq	$.LC72, %rsi	#,
 	movq	%r13, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 	jmp	.L2134	#
 .L2144:
 # drivers/clk/clk.c:1793: 	return clk_core_get_accuracy_no_lock(core);
@@ -35225,20 +35228,23 @@
 	leaq	240(%r12), %rdi	#, tmp95
 	call	__tsan_read8	#
--
 	movq	%r12, %rdi	# s,
+	movq	$.LC77, %rsi	#,
 # drivers/clk/clk.c:3244: 	struct hlist_head **lists = s->private;
 	movq	240(%r12), %rbp	# s_9(D)->private, lists
 # drivers/clk/clk.c:3246: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3247: 	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable   consumer                         id\n");
+	movl	$142, %edx	#,
 	movq	$.LC78, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3248: 	seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+	movl	$142, %edx	#,
 	movq	$.LC79, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3251: 	clk_prepare_lock();
 	call	clk_prepare_lock	#
 .L2207:
@@ -37511,7 +37517,7 @@
 	subq	$16, %rsp	#,
 # drivers/clk/clk.c:3082: {
---
 fs/seq_file.c            |  4 ++--
 include/linux/seq_file.h | 10 +++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

Comments

Christophe JAILLET April 15, 2024, 8:47 p.m. UTC | #1
Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_write() can then be used instead.
> 
> This saves a few cycles.
> 
> To have an estimation of how often this optimization triggers:
>     $ git grep seq_puts.*\" | wc -l
>     3391
> 
> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>

Hi,

any feed-back on this small optimisation of seq_puts()?

Most of its usage would be optimized and a strlen() would be saved in 
all the corresponding cases.


$ git grep seq_puts.*\" | wc -l
3436

$ git grep seq_puts | wc -l
3644

CJ

> ---
> Checked by comparing the output of a few .s files.
> Here is one of these outputs:
> 
> $ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w
> 
>   	call	clk_prepare_unlock	#
>   # drivers/clk/clk.c:3320: 	seq_puts(s, "}\n");
>   	movq	%r12, %rdi	# s,
> +	movl	$2, %edx	#,
>   	movq	$.LC66, %rsi	#,
> -	call	seq_puts	#
> +	call	seq_write	#
>   	call	__tsan_func_exit	#
>   # drivers/clk/clk.c:3322: }
>   	xorl	%eax, %eax	#
> @@ -34520,6 +34521,7 @@
>   	popq	%rbp	#
>   	popq	%r12	#
> --
>   # drivers/clk/clk.c:3205: 		seq_puts(s, "-----");
>   	call	__sanitizer_cov_trace_pc	#
> +	movl	$5, %edx	#,
>   	movq	$.LC72, %rsi	#,
>   	movq	%r13, %rdi	# s,
> -	call	seq_puts	#
> +	call	seq_write	#
>   	jmp	.L2134	#
>   .L2144:
>   # drivers/clk/clk.c:1793: 	return clk_core_get_accuracy_no_lock(core);
> @@ -35225,20 +35228,23 @@
>   	leaq	240(%r12), %rdi	#, tmp95
>   	call	__tsan_read8	#
> --
>   	movq	%r12, %rdi	# s,
> +	movq	$.LC77, %rsi	#,
>   # drivers/clk/clk.c:3244: 	struct hlist_head **lists = s->private;
>   	movq	240(%r12), %rbp	# s_9(D)->private, lists
>   # drivers/clk/clk.c:3246: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
> -	call	seq_puts	#
> +	call	seq_write	#
>   # drivers/clk/clk.c:3247: 	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable   consumer                         id\n");
> +	movl	$142, %edx	#,
>   	movq	$.LC78, %rsi	#,
>   	movq	%r12, %rdi	# s,
> -	call	seq_puts	#
> +	call	seq_write	#
>   # drivers/clk/clk.c:3248: 	seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
> +	movl	$142, %edx	#,
>   	movq	$.LC79, %rsi	#,
>   	movq	%r12, %rdi	# s,
> -	call	seq_puts	#
> +	call	seq_write	#
>   # drivers/clk/clk.c:3251: 	clk_prepare_lock();
>   	call	clk_prepare_lock	#
>   .L2207:
> @@ -37511,7 +37517,7 @@
>   	subq	$16, %rsp	#,
>   # drivers/clk/clk.c:3082: {
> ---
>   fs/seq_file.c            |  4 ++--
>   include/linux/seq_file.h | 10 +++++++++-
>   2 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index f5fdaf3b1572..8ef0a07033ca 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
>   }
>   EXPORT_SYMBOL(seq_putc);
>   
> -void seq_puts(struct seq_file *m, const char *s)
> +void __seq_puts(struct seq_file *m, const char *s)
>   {
>   	int len = strlen(s);
>   
> @@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
>   	memcpy(m->buf + m->count, s, len);
>   	m->count += len;
>   }
> -EXPORT_SYMBOL(seq_puts);
> +EXPORT_SYMBOL(__seq_puts);
>   
>   /**
>    * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
> diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
> index 234bcdb1fba4..15abf45d62c5 100644
> --- a/include/linux/seq_file.h
> +++ b/include/linux/seq_file.h
> @@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
>   __printf(2, 3)
>   void seq_printf(struct seq_file *m, const char *fmt, ...);
>   void seq_putc(struct seq_file *m, char c);
> -void seq_puts(struct seq_file *m, const char *s);
> +void __seq_puts(struct seq_file *m, const char *s);
> +#define seq_puts(m, s)						\
> +do {								\
> +	if (__builtin_constant_p(s))				\
> +		seq_write(m, s, __builtin_strlen(s));		\
> +	else							\
> +		__seq_puts(m, s);				\
> +} while (0)
> +
>   void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
>   			       unsigned long long num, unsigned int width);
>   void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
Al Viro April 15, 2024, 9 p.m. UTC | #2
On Mon, Apr 15, 2024 at 10:47:59PM +0200, Christophe JAILLET wrote:
> Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
> > Most of seq_puts() usages are done with a string literal. In such cases,
> > the length of the string car be computed at compile time in order to save
> > a strlen() call at run-time. seq_write() can then be used instead.
> > 
> > This saves a few cycles.
> > 
> > To have an estimation of how often this optimization triggers:
> >     $ git grep seq_puts.*\" | wc -l
> >     3391
> > 
> > Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> 
> Hi,
> 
> any feed-back on this small optimisation of seq_puts()?

> > +#define seq_puts(m, s)						\
> > +do {								\
> > +	if (__builtin_constant_p(s))				\
> > +		seq_write(m, s, __builtin_strlen(s));		\
> > +	else							\
> > +		__seq_puts(m, s);				\
> > +} while (0)
> > +

No need to make it a macro, actually.  And I would suggest going
a bit further:

static inline void seq_puts(struct seq_file *m, const char *s)
{
        if (!__builtin_constant_p(*s))
		__seq_puts(m, s);
	else if (s[0] && !s[1])
		seq_putc(m, s[0]);
	else
		seq_write(m, s, __builtin_strlen(s));
}
David Laight April 16, 2024, 8:56 p.m. UTC | #3
From: Al Viro
> Sent: 15 April 2024 22:01
...
> No need to make it a macro, actually.  And I would suggest going
> a bit further:
> 
> static inline void seq_puts(struct seq_file *m, const char *s)

That probably needs to be 'always_inline'.

> {
>	if (!__builtin_constant_p(*s))
> 		__seq_puts(m, s);
> 	else if (s[0] && !s[1])
> 		seq_putc(m, s[0]);
> 	else
> 		seq_write(m, s, __builtin_strlen(s));
> }

You missed seq_puts(m, "");

I did wonder about checking sizeof(s) <= 2 in the #define version.
That would pick up the cases where a separator is changed/added
in a loop.

Could you do:
	size_t len = __builtin_strlen(s);
	if (!__builtin_constant_p(len))
		__seq_puts(m, s);
	else switch (len){
	case 0: break;
	case 1: seq_putc(m, s[0]);
	default: seq_write(m, s, len);
	}

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Al Viro April 17, 2024, 1:04 a.m. UTC | #4
On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:

> > static inline void seq_puts(struct seq_file *m, const char *s)
> 
> That probably needs to be 'always_inline'.

What for?  If compiler fails to inline it (and I'd be very surprised
if that happened - if s is not a constant string, we get a straight call
of __seq_puts() and for constant strings it boils down to call of
seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
would happen; we'd still get correct behaviour.

> > {
> >	if (!__builtin_constant_p(*s))
> > 		__seq_puts(m, s);
> > 	else if (s[0] && !s[1])
> > 		seq_putc(m, s[0]);
> > 	else
> > 		seq_write(m, s, __builtin_strlen(s));
> > }
> 
> You missed seq_puts(m, "");

Where have you seen one?  And if it gets less than optimal, who cares?

> Could you do:
> 	size_t len = __builtin_strlen(s);
> 	if (!__builtin_constant_p(len))
> 		__seq_puts(m, s);
> 	else switch (len){
> 	case 0: break;
> 	case 1: seq_putc(m, s[0]);
> 	default: seq_write(m, s, len);
> 	}

Umm...  That's probably OK, but I wonder how useful would that
be...
Rasmus Villemoes April 17, 2024, 7:53 a.m. UTC | #5
On 15/04/2024 22.47, Christophe JAILLET wrote:
> Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
>> Most of seq_puts() usages are done with a string literal. In such cases,
>> the length of the string car be computed at compile time in order to save
>> a strlen() call at run-time. seq_write() can then be used instead.
>>
>> This saves a few cycles.
>>
>> To have an estimation of how often this optimization triggers:
>>     $ git grep seq_puts.*\" | wc -l
>>     3391
>>
>> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> 
> Hi,
> 
> any feed-back on this small optimisation of seq_puts()?

While you're at it, could you change the implementation of the
out-of-line seq_puts (or __seq_puts if it gets renamed to that) to
simply be seq_write(seq, s, strlen(s)) instead of duplicating the
overflow/memcpy logic.

Rasmus
Christophe JAILLET April 19, 2024, 6:59 p.m. UTC | #6
Le 17/04/2024 à 03:04, Al Viro a écrit :
> On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:
> 
>>> static inline void seq_puts(struct seq_file *m, const char *s)
>>
>> That probably needs to be 'always_inline'.
> 
> What for?  If compiler fails to inline it (and I'd be very surprised
> if that happened - if s is not a constant string, we get a straight call
> of __seq_puts() and for constant strings it boils down to call of
> seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
> would happen; we'd still get correct behaviour.
> 
>>> {
>>> 	if (!__builtin_constant_p(*s))
>>> 		__seq_puts(m, s);
>>> 	else if (s[0] && !s[1])
>>> 		seq_putc(m, s[0]);
>>> 	else
>>> 		seq_write(m, s, __builtin_strlen(s));
>>> }
>>
>> You missed seq_puts(m, "");
> 
> Where have you seen one?

Based on results from:
    git grep seq_puts.*\"\"

there is no such cases.


>  And if it gets less than optimal, who cares?
> 
>> Could you do:
>> 	size_t len = __builtin_strlen(s);
>> 	if (!__builtin_constant_p(len))
>> 		__seq_puts(m, s);
>> 	else switch (len){
>> 	case 0: break;
>> 	case 1: seq_putc(m, s[0]);

missing break;

>> 	default: seq_write(m, s, len);
>> 	}
> 
> Umm...  That's probably OK, but I wonder how useful would that
> be...
> 

Thanks all for your feedback.

I'll send a v2.

CJ

>
Christophe JAILLET April 19, 2024, 8:38 p.m. UTC | #7
Le 16/04/2024 à 22:56, David Laight a écrit :
> From: Al Viro
>> Sent: 15 April 2024 22:01
> ...
>> No need to make it a macro, actually.  And I would suggest going
>> a bit further:
>>
>> static inline void seq_puts(struct seq_file *m, const char *s)
> 
> That probably needs to be 'always_inline'.
> 
>> {
>> 	if (!__builtin_constant_p(*s))
>> 		__seq_puts(m, s);
>> 	else if (s[0] && !s[1])
>> 		seq_putc(m, s[0]);
>> 	else
>> 		seq_write(m, s, __builtin_strlen(s));
>> }
> 
> You missed seq_puts(m, "");
> 
> I did wonder about checking sizeof(s) <= 2 in the #define version.

git grep seq_puts.*\"[^\\].\" | wc -l
77

What would you do in this case?
2 seq_putc() in order to save a memcpy(..., 2), that's it?

It would also slightly change the behaviour, as only the 1st char could 
be added. Actually, it is all or nothing.

Don't know what is the best.

Any thought?

CJ


> That would pick up the cases where a separator is changed/added
> in a loop.
> 
> Could you do:
> 	size_t len = __builtin_strlen(s);
> 	if (!__builtin_constant_p(len))
> 		__seq_puts(m, s);
> 	else switch (len){
> 	case 0: break;
> 	case 1: seq_putc(m, s[0]);
> 	default: seq_write(m, s, len);
> 	}
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 
> 
>
Al Viro April 19, 2024, 9:32 p.m. UTC | #8
On Fri, Apr 19, 2024 at 10:38:15PM +0200, Christophe JAILLET wrote:
> Le 16/04/2024 à 22:56, David Laight a écrit :
> > From: Al Viro
> > > Sent: 15 April 2024 22:01
> > ...
> > > No need to make it a macro, actually.  And I would suggest going
> > > a bit further:
> > > 
> > > static inline void seq_puts(struct seq_file *m, const char *s)
> > 
> > That probably needs to be 'always_inline'.
> > 
> > > {
> > > 	if (!__builtin_constant_p(*s))
> > > 		__seq_puts(m, s);
> > > 	else if (s[0] && !s[1])
> > > 		seq_putc(m, s[0]);
> > > 	else
> > > 		seq_write(m, s, __builtin_strlen(s));
> > > }
> > 
> > You missed seq_puts(m, "");
> > 
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
> 
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
> 
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?

Not a damn thing - just have it call seq_write().  Note that
	if (s[0] && !s[1])
which triggers only on single-character strings.
David Laight April 21, 2024, 5:21 p.m. UTC | #9
From: Christophe JAILLET
> Sent: 19 April 2024 21:38
...
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
> 
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
> 
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?
> 
> It would also slightly change the behaviour, as only the 1st char could
> be added. Actually, it is all or nothing.

Doing:
	if (sizeof(str) == 2 && str[0])
		seq_putc(m. str[0]);
	else
		__seq_puts(m, str);
Would pick up loops that do:
	char sep[2] = "";

	for (;; sep[0] = ',') {
		...
		seq_puts(m, sep);
		...
	}
as well as seq_puts(m, "x");

Whether that is worthwhile is another matter.
But it might be used.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@  void seq_putc(struct seq_file *m, char c)
 }
 EXPORT_SYMBOL(seq_putc);
 
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
 {
 	int len = strlen(s);
 
@@ -680,7 +680,7 @@  void seq_puts(struct seq_file *m, const char *s)
 	memcpy(m->buf + m->count, s, len);
 	m->count += len;
 }
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
 
 /**
  * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..15abf45d62c5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,15 @@  void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
 __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+#define seq_puts(m, s)						\
+do {								\
+	if (__builtin_constant_p(s))				\
+		seq_write(m, s, __builtin_strlen(s));		\
+	else							\
+		__seq_puts(m, s);				\
+} while (0)
+
 void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,