@@ -31,27 +31,43 @@ static inline char *strcpy(char *__dest, const char *__src)
#define __HAVE_ARCH_STRNCPY
static inline char *strncpy(char *__dest, const char *__src, size_t __n)
{
- register char *__xdest = __dest;
- unsigned long __dummy;
+ char * retval = __dest;
+ const char * __dest_end = __dest + __n - 1;
+ register unsigned int * r0_register __asm__ ("r0");
+ /* size_t is always unsigned */
if (__n == 0)
- return __xdest;
-
- __asm__ __volatile__(
- "1:\n"
- "mov.b @%1+, %2\n\t"
- "mov.b %2, @%0\n\t"
- "cmp/eq #0, %2\n\t"
- "bt/s 2f\n\t"
- " cmp/eq %5,%1\n\t"
- "bf/s 1b\n\t"
- " add #1, %0\n"
- "2:"
- : "=r" (__dest), "=r" (__src), "=&z" (__dummy)
- : "0" (__dest), "1" (__src), "r" (__src+__n)
- : "memory", "t");
-
- return __xdest;
+ return retval;
+
+ /*
+ * Some notes:
+ * - cmp/eq #imm8,r0 is its own instruction
+ * - incrementing dest and comparing to dest_end handles the size parameter in only one instruction
+ * - mov.b R0,@Rn+ is SH2A only, but we can fill a delay slot with "add #1,%[dest]"
+ */
+ __asm__ __volatile__ (
+ "strncpy_start:\n\t"
+ "mov.b @%[src]+,%[r0_reg]\n\t"
+ "cmp/eq #0,%[r0_reg]\n\t"
+ "bt.s strncpy_pad\n\t"
+ "cmp/eq %[dest],%[dest_end]\n\t"
+ "bt.s strncpy_end\n\t"
+ "mov.b %[r0_reg],@%[dest]\n\t"
+ "bra strncpy_start\n\t"
+ "add #1,%[dest]\n\t"
+ "strncpy_pad:\n\t"
+ "bt.s strncpy_end\n\t"
+ "mov.b %[r0_reg],@%[dest]\n\t"
+ "add #1,%[dest]\n\t"
+ "bra strncpy_pad\n\t"
+ "cmp/eq %[dest],%[dest_end]\n\t"
+ "strncpy_end:\n\t"
+ : [src] "+r" (__src), [dest] "+r" (__dest), [r0_reg] "+&z" (r0_register)
+ : [dest_end] "r" (__dest_end)
+ : "t","memory"
+ );
+
+ return retval;
}
#define __HAVE_ARCH_STRCMP