diff mbox

[next] ocfs2: Reduce object size of mlog uses

Message ID 1429255070.2850.84.camel@perches.com (mailing list archive)
State New, archived
Headers show

Commit Message

Joe Perches April 17, 2015, 7:17 a.m. UTC
Using a function for __mlog_printk instead of a macro
reduces the object size of built-in.o more than 120KB, or
~10% overall (x86-64 defconfig with all ocfs2 options)

$ size fs/ocfs2/built-in.o*
   text	   data	    bss	    dec	    hex	filename
 936255	 118071	 134408	1188734	 12237e	fs/ocfs2/built-in.o.new
1064081	 118071	 134408	1316560	 1416d0	fs/ocfs2/built-in.o.old

Miscellanea:

o Neaten macros around the __mlog_printk uses.
o Use __func__ for __PRETTY_FUNCTION__
o Use ##__VA_ARGS__ for args...

Signed-off-by: Joe Perches <joe@perches.com>
---
Compiled/untested

 fs/ocfs2/cluster/masklog.c | 17 +++++++++++++++++
 fs/ocfs2/cluster/masklog.h | 28 +++++++++++++++++-----------
 2 files changed, 34 insertions(+), 11 deletions(-)

Comments

Andrew Morton April 22, 2015, 10:46 p.m. UTC | #1
On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote:

> Using a function for __mlog_printk instead of a macro
> reduces the object size of built-in.o more than 120KB, or
> ~10% overall (x86-64 defconfig with all ocfs2 options)
> 
> $ size fs/ocfs2/built-in.o*
>    text	   data	    bss	    dec	    hex	filename
>  936255	 118071	 134408	1188734	 12237e	fs/ocfs2/built-in.o.new
> 1064081	 118071	 134408	1316560	 1416d0	fs/ocfs2/built-in.o.old

It's a start.

> --- a/fs/ocfs2/cluster/masklog.c
> +++ b/fs/ocfs2/cluster/masklog.c
> @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count)
>  	return count;
>  }
>  
> +void __mlog_printk(const char *level, const char *func, int line,
> +		   const char *fmt, ...)
> +{
> +	struct va_format vaf;
> +	va_list args;
> +
> +	va_start(args, fmt);
> +
> +	vaf.fmt = fmt;
> +	vaf.va = &args;
> +
> +	printk("%s(%s,%u,%lu):%s:%d %pV",
> +	       level, current->comm, task_pid_nr(current), __mlog_cpu_guess,
> +	       func, line, &vaf);
> +
> +	va_end(args);
> +}

Logging function-name and line-number was a bit weird.  I wonder if
anyone will mind if this is converted to file-n-line, as God intended. 
That will shrink rodata a bit, because number-of-files is a lot less
than number-of-functions.

>  struct mlog_attribute {
>  	struct attribute attr;
>  	u64 mask;
> diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
> index 7fdc25a..6036e6a 100644
> --- a/fs/ocfs2/cluster/masklog.h
> +++ b/fs/ocfs2/cluster/masklog.h
> @@ -168,7 +168,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
>   * scream.  just do this instead of trying to guess which we're building
>   * against.. *sigh*.
>   */
> -#define __mlog_cpu_guess ({		\
> +#define __mlog_cpu_guess		\
> +({					\

While we're in there we should turn this into __mlog_cpu_guess().

Or, preferably, just zap the sorry thing and use
raw_smp_processor_id().

>  	unsigned long _cpu = get_cpu();	\
>  	put_cpu();			\
>  	_cpu;				\
> @@ -178,21 +179,25 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
>   * before ##args is intentional. Otherwise, gcc 2.95 will eat the
>   * previous token if args expands to nothing.
>   */
> -#define __mlog_printk(level, fmt, args...)				\
> -	printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm,		\
> -	       task_pid_nr(current), __mlog_cpu_guess,			\
> -	       __PRETTY_FUNCTION__, __LINE__ , ##args)
> +__printf(4, 5)
> +void __mlog_printk(const char *level, const char *func, int line,
> +		   const char *fmt, ...);
>  
> -#define mlog(mask, fmt, args...) do {					\
> +#define mlog(mask, fmt, ...)						\
> +do {									\
>  	u64 __m = MLOG_MASK_PREFIX | (mask);				\
>  	if ((__m & ML_ALLOWED_BITS) &&					\
>  	    __mlog_test_u64(__m, mlog_and_bits) &&			\
>  	    !__mlog_test_u64(__m, mlog_not_bits)) {			\
>  		if (__m & ML_ERROR)					\

All this goop can also be uninlined?

> -			__mlog_printk(KERN_ERR, "ERROR: "fmt , ##args);	\
> +			__mlog_printk(KERN_ERR, __func__, __LINE__,	\
> +				      "ERROR: " fmt, ##__VA_ARGS__);	\
>  		else if (__m & ML_NOTICE)				\
> -			__mlog_printk(KERN_NOTICE, fmt , ##args);	\
> -		else __mlog_printk(KERN_INFO, fmt , ##args);		\
> +			__mlog_printk(KERN_NOTICE, __func__, __LINE__,	\
> +				      fmt, ##__VA_ARGS__);		\
> +		else							\
> +			__mlog_printk(KERN_INFO, __func__, __LINE__,	\
> +				      fmt, ##__VA_ARGS__);		\
>  	}								\
>  } while (0)
>  

I guess this patch is a step on the way - a 10% shrink is decent.  But
I believe that with full uninlining of the ocfs2 logging code we can
shrink the filesystem's footprint by 50%.

This code needs some pretty serious rework and rethink, perhaps
involving a change to the emitted info.  I was hoping one of the ocfs2
developers would take the bait, but they're all in hiding.

If you feel like undertaking such a rotorooting then go wild - that should
wake 'em up ;)
Joe Perches April 23, 2015, 2:34 a.m. UTC | #2
On Wed, 2015-04-22 at 15:46 -0700, Andrew Morton wrote:
> On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote:
> 
> > Using a function for __mlog_printk instead of a macro
> > reduces the object size of built-in.o more than 120KB, or
> > ~10% overall (x86-64 defconfig with all ocfs2 options)
> > 
> > $ size fs/ocfs2/built-in.o*
> >    text	   data	    bss	    dec	    hex	filename
> >  936255	 118071	 134408	1188734	 12237e	fs/ocfs2/built-in.o.new
> > 1064081	 118071	 134408	1316560	 1416d0	fs/ocfs2/built-in.o.old
> 
> It's a start.
> 
> > --- a/fs/ocfs2/cluster/masklog.c
> > +++ b/fs/ocfs2/cluster/masklog.c
> > @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count)
> >  	return count;
> >  }
> >  
> > +void __mlog_printk(const char *level, const char *func, int line,
> > +		   const char *fmt, ...)
> > +{
> > +	struct va_format vaf;
> > +	va_list args;
> > +
> > +	va_start(args, fmt);
> > +
> > +	vaf.fmt = fmt;
> > +	vaf.va = &args;
> > +
> > +	printk("%s(%s,%u,%lu):%s:%d %pV",
> > +	       level, current->comm, task_pid_nr(current), __mlog_cpu_guess,
> > +	       func, line, &vaf);
> > +
> > +	va_end(args);
> > +}
> 
> Logging function-name and line-number was a bit weird.  I wonder if
> anyone will mind if this is converted to file-n-line, as God intended. 
> That will shrink rodata a bit, because number-of-files is a lot less
> than number-of-functions.

I don't care one way or another.

Using __FILE__ vs __func__ reduces built-in.o
by about 25K.

I didn't bother to determine the actual total
reduction in a vmlinux.o

> > -#define mlog(mask, fmt, args...) do {					\
> > +#define mlog(mask, fmt, ...)						\
> > +do {									\
> >  	u64 __m = MLOG_MASK_PREFIX | (mask);				\
> >  	if ((__m & ML_ALLOWED_BITS) &&					\
> >  	    __mlog_test_u64(__m, mlog_and_bits) &&			\
> >  	    !__mlog_test_u64(__m, mlog_not_bits)) {			\
> >  		if (__m & ML_ERROR)					\

> All this goop can also be uninlined?

You have to convert the level pointer to a u64 pointer
passing &__m, but yeah, it's becomes about 65K smaller.

The macro becomes simpler too as the (__m & ML_<LEVEL>)
tests go into the function.

So that's another 7 or 8 % or more total shrinking.

> I guess this patch is a step on the way - a 10% shrink is decent.  But
> I believe that with full uninlining of the ocfs2 logging code we can
> shrink the filesystem's footprint by 50%.

Nope.

Even if CONFIG_PRINTK is not set and the mlog #define
is a no-op, it's not quite that big a reduction.

You have to turn the function tracing code off too for
that 50%.

> If you feel like undertaking such a rotorooting then go wild - that should
> wake 'em up ;)

One step at a time...
Mark Fasheh April 23, 2015, 11:04 p.m. UTC | #3
On Wed, Apr 22, 2015 at 03:46:04PM -0700, Andrew Morton wrote:
> On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote:
> 
> > Using a function for __mlog_printk instead of a macro
> > reduces the object size of built-in.o more than 120KB, or
> > ~10% overall (x86-64 defconfig with all ocfs2 options)
> > 
> > $ size fs/ocfs2/built-in.o*
> >    text	   data	    bss	    dec	    hex	filename
> >  936255	 118071	 134408	1188734	 12237e	fs/ocfs2/built-in.o.new
> > 1064081	 118071	 134408	1316560	 1416d0	fs/ocfs2/built-in.o.old
> 
> It's a start.
> 
> > --- a/fs/ocfs2/cluster/masklog.c
> > +++ b/fs/ocfs2/cluster/masklog.c
> > @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count)
> >  	return count;
> >  }
> >  
> > +void __mlog_printk(const char *level, const char *func, int line,
> > +		   const char *fmt, ...)
> > +{
> > +	struct va_format vaf;
> > +	va_list args;
> > +
> > +	va_start(args, fmt);
> > +
> > +	vaf.fmt = fmt;
> > +	vaf.va = &args;
> > +
> > +	printk("%s(%s,%u,%lu):%s:%d %pV",
> > +	       level, current->comm, task_pid_nr(current), __mlog_cpu_guess,
> > +	       func, line, &vaf);
> > +
> > +	va_end(args);
> > +}
> 
> Logging function-name and line-number was a bit weird.  I wonder if
> anyone will mind if this is converted to file-n-line, as God intended. 
> That will shrink rodata a bit, because number-of-files is a lot less
> than number-of-functions.

We can live with file-n-line.


> > -			__mlog_printk(KERN_ERR, "ERROR: "fmt , ##args);	\
> > +			__mlog_printk(KERN_ERR, __func__, __LINE__,	\
> > +				      "ERROR: " fmt, ##__VA_ARGS__);	\
> >  		else if (__m & ML_NOTICE)				\
> > -			__mlog_printk(KERN_NOTICE, fmt , ##args);	\
> > -		else __mlog_printk(KERN_INFO, fmt , ##args);		\
> > +			__mlog_printk(KERN_NOTICE, __func__, __LINE__,	\
> > +				      fmt, ##__VA_ARGS__);		\
> > +		else							\
> > +			__mlog_printk(KERN_INFO, __func__, __LINE__,	\
> > +				      fmt, ##__VA_ARGS__);		\
> >  	}								\
> >  } while (0)
> >  
> 
> I guess this patch is a step on the way - a 10% shrink is decent.  But
> I believe that with full uninlining of the ocfs2 logging code we can
> shrink the filesystem's footprint by 50%.
> 
> This code needs some pretty serious rework and rethink, perhaps
> involving a change to the emitted info.  I was hoping one of the ocfs2
> developers would take the bait, but they're all in hiding.

If it functions the same and doesn't have a major performance change, I'm
pretty sure it'll be fine. We sometimes ask customers to enable some of the
debugging if they are having an issue. I would ask that it be tested
on a live system - a local fs, no cluster or cluster config required.


> If you feel like undertaking such a rotorooting then go wild - that should
> wake 'em up ;)

Ok, I've taken the bait  :)
	--Mark

--
Mark Fasheh
Andrew Morton April 23, 2015, 11:19 p.m. UTC | #4
On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote:

> > This code needs some pretty serious rework and rethink, perhaps
> > involving a change to the emitted info.  I was hoping one of the ocfs2
> > developers would take the bait, but they're all in hiding.
> 
> If it functions the same and doesn't have a major performance change, I'm
> pretty sure it'll be fine. We sometimes ask customers to enable some of the
> debugging if they are having an issue. I would ask that it be tested
> on a live system - a local fs, no cluster or cluster config required.

Is there a simpleton's guide to testing ocfs2 on a local disk?  One
which assumes a starting point of "knows how to type".

A few paragraphs in Documentation/filesystems/ocfs2.txt would be great
- then we can point non-ocfs2 people at it when they muck with stuff.
Richard Weinberger April 23, 2015, 11:37 p.m. UTC | #5
On Fri, Apr 24, 2015 at 1:19 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote:
>
>> > This code needs some pretty serious rework and rethink, perhaps
>> > involving a change to the emitted info.  I was hoping one of the ocfs2
>> > developers would take the bait, but they're all in hiding.
>>
>> If it functions the same and doesn't have a major performance change, I'm
>> pretty sure it'll be fine. We sometimes ask customers to enable some of the
>> debugging if they are having an issue. I would ask that it be tested
>> on a live system - a local fs, no cluster or cluster config required.
>
> Is there a simpleton's guide to testing ocfs2 on a local disk?  One
> which assumes a starting point of "knows how to type".

See http://docs.oracle.com/cd/E37670_01/E41138/html/ol_crlcl_ocfs2.html
Mark Fasheh April 24, 2015, 8:31 p.m. UTC | #6
On Fri, Apr 24, 2015 at 01:37:34AM +0200, Richard Weinberger wrote:
> On Fri, Apr 24, 2015 at 1:19 AM, Andrew Morton
> <akpm@linux-foundation.org> wrote:
> > On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote:
> >
> >> > This code needs some pretty serious rework and rethink, perhaps
> >> > involving a change to the emitted info.  I was hoping one of the ocfs2
> >> > developers would take the bait, but they're all in hiding.
> >>
> >> If it functions the same and doesn't have a major performance change, I'm
> >> pretty sure it'll be fine. We sometimes ask customers to enable some of the
> >> debugging if they are having an issue. I would ask that it be tested
> >> on a live system - a local fs, no cluster or cluster config required.
> >
> > Is there a simpleton's guide to testing ocfs2 on a local disk?  One
> > which assumes a starting point of "knows how to type".
> 
> See http://docs.oracle.com/cd/E37670_01/E41138/html/ol_crlcl_ocfs2.html

As Richard points out, it's pretty straightforward, just use the '-M local'
switch to mkfs.ocfs2. From there mount.ocfs2 will do the right thing when
you ask it to load the file system.
	--Mark

--
Mark Fasheh
diff mbox

Patch

diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index af7598b..5b7a351 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -64,6 +64,23 @@  static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count)
 	return count;
 }
 
+void __mlog_printk(const char *level, const char *func, int line,
+		   const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	printk("%s(%s,%u,%lu):%s:%d %pV",
+	       level, current->comm, task_pid_nr(current), __mlog_cpu_guess,
+	       func, line, &vaf);
+
+	va_end(args);
+}
 struct mlog_attribute {
 	struct attribute attr;
 	u64 mask;
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 7fdc25a..6036e6a 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -168,7 +168,8 @@  extern struct mlog_bits mlog_and_bits, mlog_not_bits;
  * scream.  just do this instead of trying to guess which we're building
  * against.. *sigh*.
  */
-#define __mlog_cpu_guess ({		\
+#define __mlog_cpu_guess		\
+({					\
 	unsigned long _cpu = get_cpu();	\
 	put_cpu();			\
 	_cpu;				\
@@ -178,21 +179,25 @@  extern struct mlog_bits mlog_and_bits, mlog_not_bits;
  * before ##args is intentional. Otherwise, gcc 2.95 will eat the
  * previous token if args expands to nothing.
  */
-#define __mlog_printk(level, fmt, args...)				\
-	printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm,		\
-	       task_pid_nr(current), __mlog_cpu_guess,			\
-	       __PRETTY_FUNCTION__, __LINE__ , ##args)
+__printf(4, 5)
+void __mlog_printk(const char *level, const char *func, int line,
+		   const char *fmt, ...);
 
-#define mlog(mask, fmt, args...) do {					\
+#define mlog(mask, fmt, ...)						\
+do {									\
 	u64 __m = MLOG_MASK_PREFIX | (mask);				\
 	if ((__m & ML_ALLOWED_BITS) &&					\
 	    __mlog_test_u64(__m, mlog_and_bits) &&			\
 	    !__mlog_test_u64(__m, mlog_not_bits)) {			\
 		if (__m & ML_ERROR)					\
-			__mlog_printk(KERN_ERR, "ERROR: "fmt , ##args);	\
+			__mlog_printk(KERN_ERR, __func__, __LINE__,	\
+				      "ERROR: " fmt, ##__VA_ARGS__);	\
 		else if (__m & ML_NOTICE)				\
-			__mlog_printk(KERN_NOTICE, fmt , ##args);	\
-		else __mlog_printk(KERN_INFO, fmt , ##args);		\
+			__mlog_printk(KERN_NOTICE, __func__, __LINE__,	\
+				      fmt, ##__VA_ARGS__);		\
+		else							\
+			__mlog_printk(KERN_INFO, __func__, __LINE__,	\
+				      fmt, ##__VA_ARGS__);		\
 	}								\
 } while (0)
 
@@ -205,10 +210,11 @@  extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 	_st;								\
 })
 
-#define mlog_bug_on_msg(cond, fmt, args...) do {			\
+#define mlog_bug_on_msg(cond, fmt, ...)					\
+do {									\
 	if (cond) {							\
 		mlog(ML_ERROR, "bug expression: " #cond "\n");		\
-		mlog(ML_ERROR, fmt, ##args);				\
+		mlog(ML_ERROR, fmt, ##__VA_ARGS__);			\
 		BUG();							\
 	}								\
 } while (0)