freemyipod r287 - Code Review

Jump to: navigation, search
Repository:freemyipod
Revision:r286‎ | r287 | r288 >
Date:22:21, 27 November 2010
Author:theseven
Status:new
Tags:
Comment:
UMSboot: faster memcpy/memset
Modified paths:
  • /umsboot/SOURCES (modified) (history)
  • /umsboot/arm/memcpy-arm.S (added) (history)
  • /umsboot/arm/memset-arm.S (added) (history)

Diff [purge]

Index: umsboot/arm/memcpy-arm.S
@@ -0,0 +1,97 @@
 2+/***************************************************************************
 3+ * __________ __ ___.
 4+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
 5+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
 6+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
 7+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
 8+ * \/ \/ \/ \/ \/
 9+ * $Id$
 10+ *
 11+ * Copyright (C) 2006 Free Software Foundation, Inc.
 12+ * This file was originally part of the GNU C Library
 13+ * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
 14+ * Adapted for Rockbox by Daniel Ankers
 15+ *
 16+ * This program is free software; you can redistribute it and/or
 17+ * modify it under the terms of the GNU General Public License
 18+ * as published by the Free Software Foundation; either version 2
 19+ * of the License, or (at your option) any later version.
 20+ *
 21+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 22+ * KIND, either express or implied.
 23+ *
 24+ ****************************************************************************/
 25+
 26+#define ASM_FILE
 27+#include "global.h"
 28+
 29+/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
 30+.macro ldmpc cond="", order="ia", regs
 31+#if ARM_ARCH == 4 && defined(USE_THUMB)
 32+ ldm\cond\order sp!, { \regs, lr }
 33+ bx\cond lr
 34+#else
 35+ ldm\cond\order sp!, { \regs, pc }
 36+#endif
 37+.endm
 38+.macro ldrpc cond=""
 39+#if ARM_ARCH == 4 && defined(USE_THUMB)
 40+ ldr\cond lr, [sp], #4
 41+ bx\cond lr
 42+#else
 43+ ldr\cond pc, [sp], #4
 44+#endif
 45+.endm
 46+
 47+/*
 48+ * Endian independent macros for shifting bytes within registers.
 49+ */
 50+#ifndef __ARMEB__
 51+#define pull lsr
 52+#define push lsl
 53+#else
 54+#define pull lsl
 55+#define push lsr
 56+#endif
 57+
 58+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 59+
 60+ .section .icode,"ax",%progbits
 61+
 62+ .align 2
 63+ .global memcpy
 64+ .type memcpy,%function
 65+
 66+memcpy:
 67+ tst r0, #31
 68+ tsteq r1, #31
 69+ tsteq r2, #31
 70+ bne 2f
 71+
 72+ stmfd sp!, {r4-r8,lr}
 73+
 74+1: ldmia r1!, {r3-r8,ip,lr}
 75+ subs r2, r2, #32
 76+ stmia r0!, {r3-r8,ip,lr}
 77+ bhi 1b
 78+
 79+ ldmpc regs="r4-r8"
 80+
 81+2: tst r0, #3
 82+ tsteq r1, #3
 83+ tsteq r2, #3
 84+ bne 4f
 85+
 86+3: ldr r3, [r1], #4
 87+ subs r2, r2, #4
 88+ str r3, [r0], #4
 89+ bhi 3b
 90+
 91+ bx lr
 92+
 93+4: ldrb r3, [r1], #1
 94+ subs r2, r2, #1
 95+ strb r3, [r0], #1
 96+ bhi 4b
 97+
 98+ bx lr
Index: umsboot/arm/memset-arm.S
@@ -0,0 +1,118 @@
 2+/***************************************************************************
 3+ * __________ __ ___.
 4+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
 5+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
 6+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
 7+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
 8+ * \/ \/ \/ \/ \/
 9+ * $Id$
 10+ *
 11+ * Copyright (C) 2006 by Thom Johansen
 12+ *
 13+ * This program is free software; you can redistribute it and/or
 14+ * modify it under the terms of the GNU General Public License
 15+ * as published by the Free Software Foundation; either version 2
 16+ * of the License, or (at your option) any later version.
 17+ *
 18+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 19+ * KIND, either express or implied.
 20+ *
 21+ ****************************************************************************/
 22+
 23+#define ASM_FILE
 24+#include "global.h"
 25+
 26+/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
 27+.macro ldmpc cond="", order="ia", regs
 28+#if ARM_ARCH == 4 && defined(USE_THUMB)
 29+ ldm\cond\order sp!, { \regs, lr }
 30+ bx\cond lr
 31+#else
 32+ ldm\cond\order sp!, { \regs, pc }
 33+#endif
 34+.endm
 35+.macro ldrpc cond=""
 36+#if ARM_ARCH == 4 && defined(USE_THUMB)
 37+ ldr\cond lr, [sp], #4
 38+ bx\cond lr
 39+#else
 40+ ldr\cond pc, [sp], #4
 41+#endif
 42+.endm
 43+
 44+ .section .icode,"ax",%progbits
 45+
 46+ .align 2
 47+
 48+/* The following code is based on code found in Linux kernel version 2.6.15.3
 49+ * linux/arch/arm/lib/memset.S
 50+ *
 51+ * Copyright (C) 1995-2000 Russell King
 52+ */
 53+
 54+/* This code will align a pointer for memset, if needed */
 55+1: cmp r2, #4 @ 1 do we have enough
 56+ blt 5f @ 1 bytes to align with?
 57+ cmp r3, #2 @ 1
 58+ strgtb r1, [r0, #-1]! @ 1
 59+ strgeb r1, [r0, #-1]! @ 1
 60+ strb r1, [r0, #-1]! @ 1
 61+ sub r2, r2, r3 @ 1 r2 = r2 - r3
 62+ b 2f
 63+
 64+ .global memset
 65+ .type memset,%function
 66+memset:
 67+ add r0, r0, r2 @ we'll write backwards in memory
 68+ ands r3, r0, #3 @ 1 unaligned?
 69+ bne 1b @ 1
 70+2:
 71+/*
 72+ * we know that the pointer in r0 is aligned to a word boundary.
 73+ */
 74+ orr r1, r1, r1, lsl #8
 75+ orr r1, r1, r1, lsl #16
 76+ mov r3, r1
 77+ cmp r2, #16
 78+ blt 5f
 79+/*
 80+ * We need an extra register for this loop - save the return address and
 81+ * use the LR
 82+ */
 83+ str lr, [sp, #-4]!
 84+ mov ip, r1
 85+ mov lr, r1
 86+
 87+3: subs r2, r2, #64
 88+ stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
 89+ stmgedb r0!, {r1, r3, ip, lr}
 90+ stmgedb r0!, {r1, r3, ip, lr}
 91+ stmgedb r0!, {r1, r3, ip, lr}
 92+ bgt 3b
 93+ ldrpc cond=eq @ Now <64 bytes to go.
 94+/*
 95+ * No need to correct the count; we're only testing bits from now on
 96+ */
 97+ tst r2, #32
 98+ stmnedb r0!, {r1, r3, ip, lr}
 99+ stmnedb r0!, {r1, r3, ip, lr}
 100+ tst r2, #16
 101+ stmnedb r0!, {r1, r3, ip, lr}
 102+ ldr lr, [sp], #4
 103+
 104+5: tst r2, #8
 105+ stmnedb r0!, {r1, r3}
 106+ tst r2, #4
 107+ strne r1, [r0, #-4]!
 108+/*
 109+ * When we get here, we've got less than 4 bytes to zero. We
 110+ * may have an unaligned pointer as well.
 111+ */
 112+6: tst r2, #2
 113+ strneb r1, [r0, #-1]!
 114+ strneb r1, [r0, #-1]!
 115+ tst r2, #1
 116+ strneb r1, [r0, #-1]!
 117+ bx lr
 118+.end:
 119+ .size memset,.end-memset
Index: umsboot/SOURCES
@@ -15,6 +15,8 @@
1616 #ifdef ARM_ARCH
1717 arm/arm-support.S
1818 arm/contextswitch.S
 19+arm/memcpy-arm.S
 20+arm/memset-arm.S
1921 #endif
2022
2123 main.c
@@ -28,7 +30,5 @@
2931 ramdisk.c
3032 usb/usb.c
3133
32 -libc/memcpy.c
33 -libc/memset.c
3434 libc/sprintf.c
3535 libc/strlen.c