Message ID | 1429255070.2850.84.camel@perches.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote: > Using a function for __mlog_printk instead of a macro > reduces the object size of built-in.o more than 120KB, or > ~10% overall (x86-64 defconfig with all ocfs2 options) > > $ size fs/ocfs2/built-in.o* > text data bss dec hex filename > 936255 118071 134408 1188734 12237e fs/ocfs2/built-in.o.new > 1064081 118071 134408 1316560 1416d0 fs/ocfs2/built-in.o.old It's a start. > --- a/fs/ocfs2/cluster/masklog.c > +++ b/fs/ocfs2/cluster/masklog.c > @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count) > return count; > } > > +void __mlog_printk(const char *level, const char *func, int line, > + const char *fmt, ...) > +{ > + struct va_format vaf; > + va_list args; > + > + va_start(args, fmt); > + > + vaf.fmt = fmt; > + vaf.va = &args; > + > + printk("%s(%s,%u,%lu):%s:%d %pV", > + level, current->comm, task_pid_nr(current), __mlog_cpu_guess, > + func, line, &vaf); > + > + va_end(args); > +} Logging function-name and line-number was a bit weird. I wonder if anyone will mind if this is converted to file-n-line, as God intended. That will shrink rodata a bit, because number-of-files is a lot less than number-of-functions. > struct mlog_attribute { > struct attribute attr; > u64 mask; > diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h > index 7fdc25a..6036e6a 100644 > --- a/fs/ocfs2/cluster/masklog.h > +++ b/fs/ocfs2/cluster/masklog.h > @@ -168,7 +168,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; > * scream. just do this instead of trying to guess which we're building > * against.. *sigh*. > */ > -#define __mlog_cpu_guess ({ \ > +#define __mlog_cpu_guess \ > +({ \ While we're in there we should turn this into __mlog_cpu_guess(). Or, preferably, just zap the sorry thing and use raw_smp_processor_id(). > unsigned long _cpu = get_cpu(); \ > put_cpu(); \ > _cpu; \ > @@ -178,21 +179,25 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; > * before ##args is intentional. Otherwise, gcc 2.95 will eat the > * previous token if args expands to nothing. > */ > -#define __mlog_printk(level, fmt, args...) \ > - printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm, \ > - task_pid_nr(current), __mlog_cpu_guess, \ > - __PRETTY_FUNCTION__, __LINE__ , ##args) > +__printf(4, 5) > +void __mlog_printk(const char *level, const char *func, int line, > + const char *fmt, ...); > > -#define mlog(mask, fmt, args...) do { \ > +#define mlog(mask, fmt, ...) \ > +do { \ > u64 __m = MLOG_MASK_PREFIX | (mask); \ > if ((__m & ML_ALLOWED_BITS) && \ > __mlog_test_u64(__m, mlog_and_bits) && \ > !__mlog_test_u64(__m, mlog_not_bits)) { \ > if (__m & ML_ERROR) \ All this goop can also be uninlined? > - __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ > + __mlog_printk(KERN_ERR, __func__, __LINE__, \ > + "ERROR: " fmt, ##__VA_ARGS__); \ > else if (__m & ML_NOTICE) \ > - __mlog_printk(KERN_NOTICE, fmt , ##args); \ > - else __mlog_printk(KERN_INFO, fmt , ##args); \ > + __mlog_printk(KERN_NOTICE, __func__, __LINE__, \ > + fmt, ##__VA_ARGS__); \ > + else \ > + __mlog_printk(KERN_INFO, __func__, __LINE__, \ > + fmt, ##__VA_ARGS__); \ > } \ > } while (0) > I guess this patch is a step on the way - a 10% shrink is decent. But I believe that with full uninlining of the ocfs2 logging code we can shrink the filesystem's footprint by 50%. This code needs some pretty serious rework and rethink, perhaps involving a change to the emitted info. I was hoping one of the ocfs2 developers would take the bait, but they're all in hiding. If you feel like undertaking such a rotorooting then go wild - that should wake 'em up ;)
On Wed, 2015-04-22 at 15:46 -0700, Andrew Morton wrote: > On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote: > > > Using a function for __mlog_printk instead of a macro > > reduces the object size of built-in.o more than 120KB, or > > ~10% overall (x86-64 defconfig with all ocfs2 options) > > > > $ size fs/ocfs2/built-in.o* > > text data bss dec hex filename > > 936255 118071 134408 1188734 12237e fs/ocfs2/built-in.o.new > > 1064081 118071 134408 1316560 1416d0 fs/ocfs2/built-in.o.old > > It's a start. > > > --- a/fs/ocfs2/cluster/masklog.c > > +++ b/fs/ocfs2/cluster/masklog.c > > @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count) > > return count; > > } > > > > +void __mlog_printk(const char *level, const char *func, int line, > > + const char *fmt, ...) > > +{ > > + struct va_format vaf; > > + va_list args; > > + > > + va_start(args, fmt); > > + > > + vaf.fmt = fmt; > > + vaf.va = &args; > > + > > + printk("%s(%s,%u,%lu):%s:%d %pV", > > + level, current->comm, task_pid_nr(current), __mlog_cpu_guess, > > + func, line, &vaf); > > + > > + va_end(args); > > +} > > Logging function-name and line-number was a bit weird. I wonder if > anyone will mind if this is converted to file-n-line, as God intended. > That will shrink rodata a bit, because number-of-files is a lot less > than number-of-functions. I don't care one way or another. Using __FILE__ vs __func__ reduces built-in.o by about 25K. I didn't bother to determine the actual total reduction in a vmlinux.o > > -#define mlog(mask, fmt, args...) do { \ > > +#define mlog(mask, fmt, ...) \ > > +do { \ > > u64 __m = MLOG_MASK_PREFIX | (mask); \ > > if ((__m & ML_ALLOWED_BITS) && \ > > __mlog_test_u64(__m, mlog_and_bits) && \ > > !__mlog_test_u64(__m, mlog_not_bits)) { \ > > if (__m & ML_ERROR) \ > All this goop can also be uninlined? You have to convert the level pointer to a u64 pointer passing &__m, but yeah, it's becomes about 65K smaller. The macro becomes simpler too as the (__m & ML_<LEVEL>) tests go into the function. So that's another 7 or 8 % or more total shrinking. > I guess this patch is a step on the way - a 10% shrink is decent. But > I believe that with full uninlining of the ocfs2 logging code we can > shrink the filesystem's footprint by 50%. Nope. Even if CONFIG_PRINTK is not set and the mlog #define is a no-op, it's not quite that big a reduction. You have to turn the function tracing code off too for that 50%. > If you feel like undertaking such a rotorooting then go wild - that should > wake 'em up ;) One step at a time...
On Wed, Apr 22, 2015 at 03:46:04PM -0700, Andrew Morton wrote: > On Fri, 17 Apr 2015 00:17:50 -0700 Joe Perches <joe@perches.com> wrote: > > > Using a function for __mlog_printk instead of a macro > > reduces the object size of built-in.o more than 120KB, or > > ~10% overall (x86-64 defconfig with all ocfs2 options) > > > > $ size fs/ocfs2/built-in.o* > > text data bss dec hex filename > > 936255 118071 134408 1188734 12237e fs/ocfs2/built-in.o.new > > 1064081 118071 134408 1316560 1416d0 fs/ocfs2/built-in.o.old > > It's a start. > > > --- a/fs/ocfs2/cluster/masklog.c > > +++ b/fs/ocfs2/cluster/masklog.c > > @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count) > > return count; > > } > > > > +void __mlog_printk(const char *level, const char *func, int line, > > + const char *fmt, ...) > > +{ > > + struct va_format vaf; > > + va_list args; > > + > > + va_start(args, fmt); > > + > > + vaf.fmt = fmt; > > + vaf.va = &args; > > + > > + printk("%s(%s,%u,%lu):%s:%d %pV", > > + level, current->comm, task_pid_nr(current), __mlog_cpu_guess, > > + func, line, &vaf); > > + > > + va_end(args); > > +} > > Logging function-name and line-number was a bit weird. I wonder if > anyone will mind if this is converted to file-n-line, as God intended. > That will shrink rodata a bit, because number-of-files is a lot less > than number-of-functions. We can live with file-n-line. > > - __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ > > + __mlog_printk(KERN_ERR, __func__, __LINE__, \ > > + "ERROR: " fmt, ##__VA_ARGS__); \ > > else if (__m & ML_NOTICE) \ > > - __mlog_printk(KERN_NOTICE, fmt , ##args); \ > > - else __mlog_printk(KERN_INFO, fmt , ##args); \ > > + __mlog_printk(KERN_NOTICE, __func__, __LINE__, \ > > + fmt, ##__VA_ARGS__); \ > > + else \ > > + __mlog_printk(KERN_INFO, __func__, __LINE__, \ > > + fmt, ##__VA_ARGS__); \ > > } \ > > } while (0) > > > > I guess this patch is a step on the way - a 10% shrink is decent. But > I believe that with full uninlining of the ocfs2 logging code we can > shrink the filesystem's footprint by 50%. > > This code needs some pretty serious rework and rethink, perhaps > involving a change to the emitted info. I was hoping one of the ocfs2 > developers would take the bait, but they're all in hiding. If it functions the same and doesn't have a major performance change, I'm pretty sure it'll be fine. We sometimes ask customers to enable some of the debugging if they are having an issue. I would ask that it be tested on a live system - a local fs, no cluster or cluster config required. > If you feel like undertaking such a rotorooting then go wild - that should > wake 'em up ;) Ok, I've taken the bait :) --Mark -- Mark Fasheh
On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote: > > This code needs some pretty serious rework and rethink, perhaps > > involving a change to the emitted info. I was hoping one of the ocfs2 > > developers would take the bait, but they're all in hiding. > > If it functions the same and doesn't have a major performance change, I'm > pretty sure it'll be fine. We sometimes ask customers to enable some of the > debugging if they are having an issue. I would ask that it be tested > on a live system - a local fs, no cluster or cluster config required. Is there a simpleton's guide to testing ocfs2 on a local disk? One which assumes a starting point of "knows how to type". A few paragraphs in Documentation/filesystems/ocfs2.txt would be great - then we can point non-ocfs2 people at it when they muck with stuff.
On Fri, Apr 24, 2015 at 1:19 AM, Andrew Morton <akpm@linux-foundation.org> wrote: > On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote: > >> > This code needs some pretty serious rework and rethink, perhaps >> > involving a change to the emitted info. I was hoping one of the ocfs2 >> > developers would take the bait, but they're all in hiding. >> >> If it functions the same and doesn't have a major performance change, I'm >> pretty sure it'll be fine. We sometimes ask customers to enable some of the >> debugging if they are having an issue. I would ask that it be tested >> on a live system - a local fs, no cluster or cluster config required. > > Is there a simpleton's guide to testing ocfs2 on a local disk? One > which assumes a starting point of "knows how to type". See http://docs.oracle.com/cd/E37670_01/E41138/html/ol_crlcl_ocfs2.html
On Fri, Apr 24, 2015 at 01:37:34AM +0200, Richard Weinberger wrote: > On Fri, Apr 24, 2015 at 1:19 AM, Andrew Morton > <akpm@linux-foundation.org> wrote: > > On Thu, 23 Apr 2015 16:04:18 -0700 Mark Fasheh <mfasheh@suse.de> wrote: > > > >> > This code needs some pretty serious rework and rethink, perhaps > >> > involving a change to the emitted info. I was hoping one of the ocfs2 > >> > developers would take the bait, but they're all in hiding. > >> > >> If it functions the same and doesn't have a major performance change, I'm > >> pretty sure it'll be fine. We sometimes ask customers to enable some of the > >> debugging if they are having an issue. I would ask that it be tested > >> on a live system - a local fs, no cluster or cluster config required. > > > > Is there a simpleton's guide to testing ocfs2 on a local disk? One > > which assumes a starting point of "knows how to type". > > See http://docs.oracle.com/cd/E37670_01/E41138/html/ol_crlcl_ocfs2.html As Richard points out, it's pretty straightforward, just use the '-M local' switch to mkfs.ocfs2. From there mount.ocfs2 will do the right thing when you ask it to load the file system. --Mark -- Mark Fasheh
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index af7598b..5b7a351 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -64,6 +64,23 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count) return count; } +void __mlog_printk(const char *level, const char *func, int line, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk("%s(%s,%u,%lu):%s:%d %pV", + level, current->comm, task_pid_nr(current), __mlog_cpu_guess, + func, line, &vaf); + + va_end(args); +} struct mlog_attribute { struct attribute attr; u64 mask; diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 7fdc25a..6036e6a 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -168,7 +168,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; * scream. just do this instead of trying to guess which we're building * against.. *sigh*. */ -#define __mlog_cpu_guess ({ \ +#define __mlog_cpu_guess \ +({ \ unsigned long _cpu = get_cpu(); \ put_cpu(); \ _cpu; \ @@ -178,21 +179,25 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; * before ##args is intentional. Otherwise, gcc 2.95 will eat the * previous token if args expands to nothing. */ -#define __mlog_printk(level, fmt, args...) \ - printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm, \ - task_pid_nr(current), __mlog_cpu_guess, \ - __PRETTY_FUNCTION__, __LINE__ , ##args) +__printf(4, 5) +void __mlog_printk(const char *level, const char *func, int line, + const char *fmt, ...); -#define mlog(mask, fmt, args...) do { \ +#define mlog(mask, fmt, ...) \ +do { \ u64 __m = MLOG_MASK_PREFIX | (mask); \ if ((__m & ML_ALLOWED_BITS) && \ __mlog_test_u64(__m, mlog_and_bits) && \ !__mlog_test_u64(__m, mlog_not_bits)) { \ if (__m & ML_ERROR) \ - __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ + __mlog_printk(KERN_ERR, __func__, __LINE__, \ + "ERROR: " fmt, ##__VA_ARGS__); \ else if (__m & ML_NOTICE) \ - __mlog_printk(KERN_NOTICE, fmt , ##args); \ - else __mlog_printk(KERN_INFO, fmt , ##args); \ + __mlog_printk(KERN_NOTICE, __func__, __LINE__, \ + fmt, ##__VA_ARGS__); \ + else \ + __mlog_printk(KERN_INFO, __func__, __LINE__, \ + fmt, ##__VA_ARGS__); \ } \ } while (0) @@ -205,10 +210,11 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; _st; \ }) -#define mlog_bug_on_msg(cond, fmt, args...) do { \ +#define mlog_bug_on_msg(cond, fmt, ...) \ +do { \ if (cond) { \ mlog(ML_ERROR, "bug expression: " #cond "\n"); \ - mlog(ML_ERROR, fmt, ##args); \ + mlog(ML_ERROR, fmt, ##__VA_ARGS__); \ BUG(); \ } \ } while (0)
Using a function for __mlog_printk instead of a macro reduces the object size of built-in.o more than 120KB, or ~10% overall (x86-64 defconfig with all ocfs2 options) $ size fs/ocfs2/built-in.o* text data bss dec hex filename 936255 118071 134408 1188734 12237e fs/ocfs2/built-in.o.new 1064081 118071 134408 1316560 1416d0 fs/ocfs2/built-in.o.old Miscellanea: o Neaten macros around the __mlog_printk uses. o Use __func__ for __PRETTY_FUNCTION__ o Use ##__VA_ARGS__ for args... Signed-off-by: Joe Perches <joe@perches.com> --- Compiled/untested fs/ocfs2/cluster/masklog.c | 17 +++++++++++++++++ fs/ocfs2/cluster/masklog.h | 28 +++++++++++++++++----------- 2 files changed, 34 insertions(+), 11 deletions(-)