Message ID | 20130219203115.114eab79e8d2099c6306d921@freescale.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 2/19/2013 6:31 PM, Kim Phillips wrote: > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > index af72969..dbee639 100644 > --- a/arch/arm/lib/Makefile > +++ b/arch/arm/lib/Makefile > @@ -45,3 +45,5 @@ lib-$(CONFIG_ARCH_SHARK) += io-shark.o > > $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S > $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S > + > +CFLAGS_bswapsdi2.o = -O2 Please put a comment here so we don't have to dig through git history and/or emails to understand why this has -O2 forced on for this file.
On Tue, 19 Feb 2013, Kim Phillips wrote: > On Fri, 8 Feb 2013 22:16:47 -0500 > Nicolas Pitre <nico@fluxnic.net> wrote: > > > Not only that, but in many cases the results are wildly different given > > the same config: > > > > > imx_v6_v7_defconfig: 7637605 7636935 -670 > > > lart_defconfig: 2922550 2926600 4050 > > > mxs_defconfig: 11071139 11070893 -246 > > > > The mxs_defconfig became much better while lart_defconfig regressed a > > lot. > > > > > Haven't looked at why. > > > > Would be a good idea since this is rather weird and gcc could benefit > > from your findings. > > The following is next-20130207 built with Linaro gcc 4.7.1 [1], and > before and after the diff at the bottom of this email (and with > normalized linux version string sizes): > > lart_defconfig: 2752106 120864 56444 2929414 2cb306 vmlinux > lart_defconfig: 2756092 120864 56444 2933400 2cc298 vmlinux #builtin-bswap > > mxs_defconfig: 5229115 280572 5569648 11079335 a90ea7 vmlinux > mxs_defconfig: 5228969 280552 5569648 11079169 a90e01 vmlinux #builtin-bswap > > imx_v6_v7_defconfig: 6935025 356172 360648 7651845 74c205 vmlinux > imx_v6_v7_defconfig: 6934091 356180 360648 7650919 74be67 vmlinux #builtin-bswap > > > so builtin-bswap improved mxs and imx_v6_v7 but in lart, it _added_ > 3986 bytes to .text -> not good. > > Getting a closer look at lart, bloat-o-meter says the code actually > shrunk: > > add/remove: 7/1 grow/shrink: 11/19 up/down: 298/-356 (-58) > function old new delta > inet_abc_len - 96 +96 > __bswapdi2 - 52 +52 > __bswapsi2 - 32 +32 > icmp_unreach 472 492 +20 > xfrm_selector_match 988 1000 +12 > fib_table_insert 2176 2188 +12 > __kstrtab___bswapsi2 - 11 +11 > __kstrtab___bswapdi2 - 11 +11 > __ksymtab___bswapsi2 - 8 +8 > __ksymtab___bswapdi2 - 8 +8 > vermagic 51 57 +6 > linux_banner 230 236 +6 > xfrm_replay_check_esn 320 324 +4 > xfrm_replay_check_bmp 200 204 +4 > xfrm_replay_check 152 156 +4 > static.tcp_parse_aligned_timestamp 80 84 +4 > fib_table_delete 708 712 +4 > cookie_v4_check 1316 1320 +4 > tcp_tso_segment 728 724 -4 > tcp_options_write 724 720 -4 > ip_rt_ioctl 1152 1148 -4 > fib_trie_seq_show 724 720 -4 > crc32_be 448 444 -4 > xfrm_stateonly_find 640 632 -8 > tcp_finish_connect 276 268 -8 > static.tcp_v4_send_ack 480 472 -8 > __xfrm_state_lookup 356 348 -8 > __xfrm_state_bump_genids 436 428 -8 > __find_acq_core 1256 1248 -8 > cookie_v4_init_sequence 272 260 -12 > __xfrm_state_insert 616 600 -16 > sys_swapon 2500 2480 -20 > xfrm_state_find 2420 2396 -24 > xfrm_hash_resize 1620 1596 -24 > fib_route_seq_show 560 536 -24 > fib_table_dump 704 676 -28 > devinet_ioctl 1856 1796 -60 > static.inet_abc_len 80 - -80 > > Comparing System.maps, .rodata starts at the same address: > > c020a000 R __start_rodata > c020a000 R __start_rodata #builtin-bswap > > however, changes including the __bswap[sd]i2 implementation pushes > the .rodata section size just over the 4KiB alignment boundary > specified in arm/kernel/vmlinux.lds: > > no builtin_bswap: > > c028ffc4 R __stop___modver > c0290000 R __end_rodata > c0290000 R __start___ex_table > > with builtin_bswap: > > c0290068 R __stop___modver > c0291000 R __end_rodata > c0291000 R __start___ex_table > > So, AFAICT, that's why we see a total increase in .text for lart, > and, looking at both numbers being a little less than 4KiB, I > suspect the same with whatever happened with mxs above. OK. At least we do have a plausible explanation now. The actual code being smaller should compensate for section alignment loss. > ok, so to avoid recursion, I've enforced a -O2 on bswapsdi2.o. Not only recursion, but the horrible assembly output from -Os. > Here's the new diff: > > changes from last diff: > - enforce -O2 for bswapsdi2.o > - fix building out-of-source tree > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 4265a26..5e8b735 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -58,6 +58,7 @@ config ARM > select CLONE_BACKWARDS > select OLD_SIGSUSPEND3 > select OLD_SIGACTION > + select ARCH_USE_BUILTIN_BSWAP > help > The ARM series is a line of low-power-consumption RISC chip designs > licensed by ARM Ltd and targeted at embedded applications and > diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile > index c9865f6..8ef97c4 100644 > --- a/arch/arm/boot/compressed/Makefile > +++ b/arch/arm/boot/compressed/Makefile > @@ -111,12 +111,12 @@ endif > > targets := vmlinux vmlinux.lds \ > piggy.$(suffix_y) piggy.$(suffix_y).o \ > - lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \ > + lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \ > font.o font.c head.o misc.o $(OBJS) > > # Make sure files are removed during clean > extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \ > - lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs) > + lib1funcs.S ashldi3.S bswapsdi2.o $(libfdt) $(libfdt_hdrs) > > ifeq ($(CONFIG_FUNCTION_TRACER),y) > ORIG_CFLAGS := $(KBUILD_CFLAGS) > @@ -158,6 +158,12 @@ ashldi3 = $(obj)/ashldi3.o > $(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S > $(call cmd,shipped) > > +# For __bswapsi2, __bswapdi2 > +bswapsdi2 = $(obj)/bswapsdi2.o > + > +$(obj)/bswapsdi2.o: $(obj)/../../../../arch/$(SRCARCH)/lib/bswapsdi2.o > + $(call cmd,shipped) > + I don't think you can get away with this. The decompressor code is compiled with -fpic and the main kernel is not. Most toolchains do mark object files with some flags to prevent the link of incompatible objects together (normally pic and non pic objects are not compatible even if in this very simple case that would not matter). Maybe you are able to link zImage successfully simply because no references to __bswap* needed to be resolved and therefore the linker didn't need to search/include that object? > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > index af72969..dbee639 100644 > --- a/arch/arm/lib/Makefile > +++ b/arch/arm/lib/Makefile > @@ -13,7 +13,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ > ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ > ucmpdi2.o lib1funcs.o div64.o \ > io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ > - call_with_stack.o > + call_with_stack.o bswapsdi2.o > > mmu-y := clear_user.o copy_page.o getuser.o putuser.o > > @@ -45,3 +45,5 @@ lib-$(CONFIG_ARCH_SHARK) += io-shark.o > > $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S > $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S > + > +CFLAGS_bswapsdi2.o = -O2 Please insert a small comment to explain why this is done. Adding some more elaborate explanation in the commit log would be good too. Nicolas
On Tue, 2013-02-19 at 22:17 -0500, Nicolas Pitre wrote: > > > +$(obj)/bswapsdi2.o: $(obj)/../../../../arch/$(SRCARCH)/lib/bswapsdi2.o > > + $(call cmd,shipped) > > + > > I don't think you can get away with this. The decompressor code is > compiled with -fpic and the main kernel is not. Most toolchains do mark > object files with some flags to prevent the link of incompatible objects > together (normally pic and non pic objects are not compatible even if in > this very simple case that would not matter). Maybe you are able to > link zImage successfully simply because no references to __bswap* needed > to be resolved and therefore the linker didn't need to search/include > that object? This, and the issue with -Os vs. -O2, make me inclined to suggest that we should just provide an asm version of these functions instead of using the compiler.
On Wed, 20 Feb 2013, Woodhouse, David wrote: > On Tue, 2013-02-19 at 22:17 -0500, Nicolas Pitre wrote: > > > > > +$(obj)/bswapsdi2.o: $(obj)/../../../../arch/$(SRCARCH)/lib/bswapsdi2.o > > > + $(call cmd,shipped) > > > + > > > > I don't think you can get away with this. The decompressor code is > > compiled with -fpic and the main kernel is not. Most toolchains do mark > > object files with some flags to prevent the link of incompatible objects > > together (normally pic and non pic objects are not compatible even if in > > this very simple case that would not matter). Maybe you are able to > > link zImage successfully simply because no references to __bswap* needed > > to be resolved and therefore the linker didn't need to search/include > > that object? > > This, and the issue with -Os vs. -O2, make me inclined to suggest that > we should just provide an asm version of these functions instead of > using the compiler. You'll have the same issue wrt the above whether or not the source file is C or assembly. Nicolas
On Wed, 2013-02-20 at 08:36 -0500, Nicolas Pitre wrote: > You'll have the same issue wrt the above whether or not the source > file is C or assembly. Hm, true. I was thinking of the code itself (which is position-independent anyway), rather than the flags in the object file. So just ship a .S file and for the decompressor (if we need it at all) rebuild it just the same as we do the *other* libgcc code like ashldi3.S etc.
On Wed, 20 Feb 2013, Woodhouse, David wrote: > On Wed, 2013-02-20 at 08:36 -0500, Nicolas Pitre wrote: > > You'll have the same issue wrt the above whether or not the source > > file is C or assembly. > > Hm, true. I was thinking of the code itself (which is > position-independent anyway), rather than the flags in the object file. > > So just ship a .S file and for the decompressor (if we need it at all) > rebuild it just the same as we do the *other* libgcc code like ashldi3.S > etc. ... in which case there is no harm shipping a .c file and trivially enforcing -O2, the rest being equal. Nicolas
On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > On Wed, 2013-02-20 at 08:36 -0500, Nicolas Pitre wrote: > > > You'll have the same issue wrt the above whether or not the source > > > file is C or assembly. > > > > Hm, true. I was thinking of the code itself (which is > > position-independent anyway), rather than the flags in the object file. > > > > So just ship a .S file and for the decompressor (if we need it at all) > > rebuild it just the same as we do the *other* libgcc code like ashldi3.S > > etc. > > ... in which case there is no harm shipping a .c file and trivially > enforcing -O2, the rest being equal. For today's compilers, unless the wind changes.
On Wed, 20 Feb 2013, Woodhouse, David wrote: > On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > > > On Wed, 2013-02-20 at 08:36 -0500, Nicolas Pitre wrote: > > > > You'll have the same issue wrt the above whether or not the source > > > > file is C or assembly. > > > > > > Hm, true. I was thinking of the code itself (which is > > > position-independent anyway), rather than the flags in the object file. > > > > > > So just ship a .S file and for the decompressor (if we need it at all) > > > rebuild it just the same as we do the *other* libgcc code like ashldi3.S > > > etc. > > > > ... in which case there is no harm shipping a .c file and trivially > > enforcing -O2, the rest being equal. > > For today's compilers, unless the wind changes. We'll adapt if necessary. Going with -O2 should remain pretty safe anyway. Nicolas
On Wed, 20 Feb 2013 10:43:18 -0500 Nicolas Pitre <nico@fluxnic.net> wrote: > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > > > ... in which case there is no harm shipping a .c file and trivially > > > enforcing -O2, the rest being equal. > > > > For today's compilers, unless the wind changes. > > We'll adapt if necessary. Going with -O2 should remain pretty safe anyway. Alas, not so for gcc 4.4 - I had forgotten I had tested Ubuntu/Linaro 4.4.7-1ubuntu2 here: https://patchwork.kernel.org/patch/2101491/ add -O2 to that test script and gcc 4.4 *always* emits calls to __bswap[sd]i2, even with -march=armv6k+. I'll try working on an assembly version given it probably makes more sense, future-gcc-immunity-wise. Otherwise we're back to the old 'if GCC_VERSION >= 40500' in arch/arm/include/asm/swab.h... Kim
On Wed, 20 Feb 2013, Kim Phillips wrote: > On Wed, 20 Feb 2013 10:43:18 -0500 > Nicolas Pitre <nico@fluxnic.net> wrote: > > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > > > > ... in which case there is no harm shipping a .c file and trivially > > > > enforcing -O2, the rest being equal. > > > > > > For today's compilers, unless the wind changes. > > > > We'll adapt if necessary. Going with -O2 should remain pretty safe anyway. > > Alas, not so for gcc 4.4 - I had forgotten I had tested > Ubuntu/Linaro 4.4.7-1ubuntu2 here: > > https://patchwork.kernel.org/patch/2101491/ > > add -O2 to that test script and gcc 4.4 *always* emits calls to > __bswap[sd]i2, even with -march=armv6k+. Crap. OK, assembly code is the way to go then. > I'll try working on an assembly version given it probably > makes more sense, future-gcc-immunity-wise. Agreed. Nicolas
On Wed, 20 Feb 2013 23:29:58 -0500 Nicolas Pitre <nico@fluxnic.net> wrote: > On Wed, 20 Feb 2013, Kim Phillips wrote: > > > On Wed, 20 Feb 2013 10:43:18 -0500 > > Nicolas Pitre <nico@fluxnic.net> wrote: > > > > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > > On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > > > > > ... in which case there is no harm shipping a .c file and trivially > > > > > enforcing -O2, the rest being equal. > > > > > > > > For today's compilers, unless the wind changes. > > > > > > We'll adapt if necessary. Going with -O2 should remain pretty safe anyway. > > > > Alas, not so for gcc 4.4 - I had forgotten I had tested > > Ubuntu/Linaro 4.4.7-1ubuntu2 here: > > > > https://patchwork.kernel.org/patch/2101491/ > > > > add -O2 to that test script and gcc 4.4 *always* emits calls to > > __bswap[sd]i2, even with -march=armv6k+. argh, sorry - that script was testing support for __builtin_bswap{16,32,64} directly, which isn't the same as testing code generation of a byte swap pattern in C. > Crap. OK, assembly code is the way to go then. > > > I'll try working on an assembly version given it probably > > makes more sense, future-gcc-immunity-wise. > > Agreed. I'll still try the assembly approach - gcc 4.4's armv6 output looks worse than both the pre-armv6 and post-armv6 __arch_swab32 implementations currently in use: mov ip, sp push {fp, ip, lr, pc} sub fp, ip, #4 and r2, r0, #65280 ; 0xff00 lsl ip, r0, #24 orr r1, ip, r0, lsr #24 and r0, r0, #16711680 ; 0xff0000 orr r3, r1, r2, lsl #8 orr r0, r3, r0, lsr #8 ldm sp, {fp, sp, pc} Kim
On Wed, 2013-02-20 at 23:29 -0500, Nicolas Pitre wrote: > > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > > For today's compilers, unless the wind changes. > … > Crap. OK, assembly code is the way to go then. How quickly the wind changes, these days. I blame global warming. :)
On Thu, 21 Feb 2013, Kim Phillips wrote: > On Wed, 20 Feb 2013 23:29:58 -0500 > Nicolas Pitre <nico@fluxnic.net> wrote: > > > On Wed, 20 Feb 2013, Kim Phillips wrote: > > > > > On Wed, 20 Feb 2013 10:43:18 -0500 > > > Nicolas Pitre <nico@fluxnic.net> wrote: > > > > > > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > > > On Wed, 2013-02-20 at 09:06 -0500, Nicolas Pitre wrote: > > > > > > ... in which case there is no harm shipping a .c file and trivially > > > > > > enforcing -O2, the rest being equal. > > > > > > > > > > For today's compilers, unless the wind changes. > > > > > > > > We'll adapt if necessary. Going with -O2 should remain pretty safe anyway. > > > > > > Alas, not so for gcc 4.4 - I had forgotten I had tested > > > Ubuntu/Linaro 4.4.7-1ubuntu2 here: > > > > > > https://patchwork.kernel.org/patch/2101491/ > > > > > > add -O2 to that test script and gcc 4.4 *always* emits calls to > > > __bswap[sd]i2, even with -march=armv6k+. > > argh, sorry - that script was testing support for > __builtin_bswap{16,32,64} directly, which isn't the same as testing > code generation of a byte swap pattern in C. Still, I'm not as confident as I was about this. > I'll still try the assembly approach - gcc 4.4's armv6 output looks > worse than both the pre-armv6 and post-armv6 __arch_swab32 > implementations currently in use: > > mov ip, sp > push {fp, ip, lr, pc} > sub fp, ip, #4 You should use -fomit-frame-pointer to compile this. We don't need a frame pointer here, especially for a leaf function that the compiler decides to call on its own. > and r2, r0, #65280 ; 0xff00 > lsl ip, r0, #24 > orr r1, ip, r0, lsr #24 > and r0, r0, #16711680 ; 0xff0000 > orr r3, r1, r2, lsl #8 > orr r0, r3, r0, lsr #8 Other than that, it is true that the above is slightly suboptimal. Nicolas
On Thu, 21 Feb 2013, Woodhouse, David wrote: > On Wed, 2013-02-20 at 23:29 -0500, Nicolas Pitre wrote: > > > > On Wed, 20 Feb 2013, Woodhouse, David wrote: > > > > > For today's compilers, unless the wind changes. > > … > > Crap. OK, assembly code is the way to go then. > > How quickly the wind changes, these days. I blame global warming. :) Only fools don't believe global warming. Nicolas
On Tue, 2013-02-19 at 20:31 -0600, Kim Phillips wrote: > > > > imx_v6_v7_defconfig: 7672373 7667089 -5284 > > > lart_defconfig: 2941150 2941054 -96 > > > mxs_defconfig: 11091983 11095679 3696 > > > > The savings are good, with some impressive cases. However the > > mxs_defconfig is completely the opposite and by far. Any idea? > > Unfortunately, I don't seem to be able to reproduce this anymore. > Same linux-next, with three different compilers always produces > smaller binaries: > > text data bss dec hex filename > 5239363 280576 5569648 11089587 a936b3 linux-next-mxs-orig-gcc4.7/vmlinux > 5239169 280556 5569648 11089373 a935dd linux-next-mxs-bswap-gcc4.7/vmlinux > > 5262223 280592 5569648 11112463 a9900f linux-next-mxs-orig-gcc4.6.3/vmlinux > 5261909 280584 5569648 11112141 a98ecd linux-next-mxs-bswap-gcc4.6.3/vmlinux > > 5241379 280580 5569648 11091607 a93e97 linux-next-mxs-orig-gcc4.6ubuntu/vmlinux > 5241189 280600 5569648 11091437 a93ded linux-next-mxs-bswap-gcc4.6ubuntu/vmlinux > > So I've since made a more consistent cross-build environment, using > cross tools from Linaro [1,2] instead of via Ubuntu [3]. Andrew Pinski has done some work on GCC to support further optimisations: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55177 If you feel like building with the branch at http://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/pinskia/bytewiseunop and seeing how that affects the results, that could be interesting.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4265a26..5e8b735 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -58,6 +58,7 @@ config ARM select CLONE_BACKWARDS select OLD_SIGSUSPEND3 select OLD_SIGACTION + select ARCH_USE_BUILTIN_BSWAP help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index c9865f6..8ef97c4 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -111,12 +111,12 @@ endif targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ - lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \ + lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \ font.o font.c head.o misc.o $(OBJS) # Make sure files are removed during clean extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \ - lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs) + lib1funcs.S ashldi3.S bswapsdi2.o $(libfdt) $(libfdt_hdrs) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -158,6 +158,12 @@ ashldi3 = $(obj)/ashldi3.o $(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S $(call cmd,shipped) +# For __bswapsi2, __bswapdi2 +bswapsdi2 = $(obj)/bswapsdi2.o + +$(obj)/bswapsdi2.o: $(obj)/../../../../arch/$(SRCARCH)/lib/bswapsdi2.o + $(call cmd,shipped) + # We need to prevent any GOTOFF relocs being used with references # to symbols in the .bss section since we cannot relocate them # independently from the rest at run time. This can be achieved by @@ -179,7 +185,8 @@ if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \ fi $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ - $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) FORCE + $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \ + $(bswapsdi2) FORCE @$(check_for_multiple_zreladdr) $(call if_changed,ld) @$(check_for_bad_syms) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 60d3b73..ba578f7 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -35,6 +35,8 @@ extern void __ucmpdi2(void); extern void __udivsi3(void); extern void __umodsi3(void); extern void __do_div64(void); +extern void __bswapsi2(void); +extern void __bswapdi2(void); extern void __aeabi_idiv(void); extern void __aeabi_idivmod(void); @@ -114,6 +116,8 @@ EXPORT_SYMBOL(__ucmpdi2); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); EXPORT_SYMBOL(__do_div64); +EXPORT_SYMBOL(__bswapsi2); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_AEABI EXPORT_SYMBOL(__aeabi_idiv); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index af72969..dbee639 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -13,7 +13,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ ucmpdi2.o lib1funcs.o div64.o \ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ - call_with_stack.o + call_with_stack.o bswapsdi2.o mmu-y := clear_user.o copy_page.o getuser.o putuser.o @@ -45,3 +45,5 @@ lib-$(CONFIG_ARCH_SHARK) += io-shark.o $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S + +CFLAGS_bswapsdi2.o = -O2 diff --git a/arch/arm/lib/bswapsdi2.c b/arch/arm/lib/bswapsdi2.c new file mode 100644 index 0000000..1923b2f --- /dev/null +++ b/arch/arm/lib/bswapsdi2.c @@ -0,0 +1,19 @@ +unsigned int __bswapsi2(unsigned int x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} + +unsigned long long __bswapdi2(unsigned long long x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +}