freemyipod r286 - Code Review

Jump to: navigation, search
Repository:freemyipod
Revision:r285‎ | r286 | r287 >
Date:20:12, 27 November 2010
Author:theseven
Status:new
Tags:
Comment:
emBIOS: Faster memcpy/memmove/memset for ARM (cherrypicked from Rockbox)
Modified paths:
  • /embios/trunk/SOURCES (modified) (history)
  • /embios/trunk/arm/memcpy-arm.S (added) (history)
  • /embios/trunk/arm/memmove-arm.S (added) (history)
  • /embios/trunk/arm/memset-arm.S (added) (history)

Diff [purge]

Index: embios/trunk/arm/memmove-arm.S
@@ -0,0 +1,209 @@
 2+/***************************************************************************
 3+ * __________ __ ___.
 4+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
 5+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
 6+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
 7+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
 8+ * \/ \/ \/ \/ \/
 9+ * $Id$
 10+ *
 11+ * Copyright (C) 2006 Free Software Foundation, Inc.
 12+ * This file was originally part of the GNU C Library
 13+ * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
 14+ * Adapted for Rockbox by Daniel Ankers
 15+ *
 16+ * This program is free software; you can redistribute it and/or
 17+ * modify it under the terms of the GNU General Public License
 18+ * as published by the Free Software Foundation; either version 2
 19+ * of the License, or (at your option) any later version.
 20+ *
 21+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 22+ * KIND, either express or implied.
 23+ *
 24+ ****************************************************************************/
 25+
 26+#define ASM_FILE
 27+#include "global.h"
 28+
 29+/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
 30+.macro ldmpc cond="", order="ia", regs
 31+#if ARM_ARCH == 4 && defined(USE_THUMB)
 32+ ldm\cond\order sp!, { \regs, lr }
 33+ bx\cond lr
 34+#else
 35+ ldm\cond\order sp!, { \regs, pc }
 36+#endif
 37+.endm
 38+.macro ldrpc cond=""
 39+#if ARM_ARCH == 4 && defined(USE_THUMB)
 40+ ldr\cond lr, [sp], #4
 41+ bx\cond lr
 42+#else
 43+ ldr\cond pc, [sp], #4
 44+#endif
 45+.endm
 46+
 47+/*
 48+ * Endian independent macros for shifting bytes within registers.
 49+ */
 50+#ifndef __ARMEB__
 51+#define pull lsr
 52+#define push lsl
 53+#else
 54+#define pull lsl
 55+#define push lsr
 56+#endif
 57+
 58+ .text
 59+
 60+/*
 61+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
 62+ *
 63+ * Note:
 64+ *
 65+ * If the memory regions don't overlap, we simply branch to memcpy which is
 66+ * normally a bit faster. Otherwise the copy is done going downwards.
 67+ */
 68+
 69+ .section .icode,"ax",%progbits
 70+
 71+ .align 2
 72+ .global memmove
 73+ .type memmove,%function
 74+
 75+memmove:
 76+
 77+ subs ip, r0, r1
 78+ cmphi r2, ip
 79+ bls memcpy
 80+
 81+ stmfd sp!, {r0, r4, lr}
 82+ add r1, r1, r2
 83+ add r0, r0, r2
 84+ subs r2, r2, #4
 85+ blt 8f
 86+ ands ip, r0, #3
 87+ bne 9f
 88+ ands ip, r1, #3
 89+ bne 10f
 90+
 91+1: subs r2, r2, #(28)
 92+ stmfd sp!, {r5 - r8}
 93+ blt 5f
 94+
 95+2:
 96+3:
 97+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 98+ subs r2, r2, #32
 99+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 100+ bge 3b
 101+
 102+5: ands ip, r2, #28
 103+ rsb ip, ip, #32
 104+ addne pc, pc, ip @ C is always clear here
 105+ b 7f
 106+6: nop
 107+ ldr r3, [r1, #-4]!
 108+ ldr r4, [r1, #-4]!
 109+ ldr r5, [r1, #-4]!
 110+ ldr r6, [r1, #-4]!
 111+ ldr r7, [r1, #-4]!
 112+ ldr r8, [r1, #-4]!
 113+ ldr lr, [r1, #-4]!
 114+
 115+ add pc, pc, ip
 116+ nop
 117+ nop
 118+ str r3, [r0, #-4]!
 119+ str r4, [r0, #-4]!
 120+ str r5, [r0, #-4]!
 121+ str r6, [r0, #-4]!
 122+ str r7, [r0, #-4]!
 123+ str r8, [r0, #-4]!
 124+ str lr, [r0, #-4]!
 125+
 126+7: ldmfd sp!, {r5 - r8}
 127+
 128+8: movs r2, r2, lsl #31
 129+ ldrneb r3, [r1, #-1]!
 130+ ldrcsb r4, [r1, #-1]!
 131+ ldrcsb ip, [r1, #-1]
 132+ strneb r3, [r0, #-1]!
 133+ strcsb r4, [r0, #-1]!
 134+ strcsb ip, [r0, #-1]
 135+ ldmpc regs="r0, r4"
 136+
 137+9: cmp ip, #2
 138+ ldrgtb r3, [r1, #-1]!
 139+ ldrgeb r4, [r1, #-1]!
 140+ ldrb lr, [r1, #-1]!
 141+ strgtb r3, [r0, #-1]!
 142+ strgeb r4, [r0, #-1]!
 143+ subs r2, r2, ip
 144+ strb lr, [r0, #-1]!
 145+ blt 8b
 146+ ands ip, r1, #3
 147+ beq 1b
 148+
 149+10: bic r1, r1, #3
 150+ cmp ip, #2
 151+ ldr r3, [r1, #0]
 152+ beq 17f
 153+ blt 18f
 154+
 155+
 156+ .macro backward_copy_shift push pull
 157+
 158+ subs r2, r2, #28
 159+ blt 14f
 160+
 161+11: stmfd sp!, {r5 - r9}
 162+
 163+12:
 164+13: ldmdb r1!, {r7, r8, r9, ip}
 165+ mov lr, r3, push #\push
 166+ subs r2, r2, #32
 167+ ldmdb r1!, {r3, r4, r5, r6}
 168+ orr lr, lr, ip, pull #\pull
 169+ mov ip, ip, push #\push
 170+ orr ip, ip, r9, pull #\pull
 171+ mov r9, r9, push #\push
 172+ orr r9, r9, r8, pull #\pull
 173+ mov r8, r8, push #\push
 174+ orr r8, r8, r7, pull #\pull
 175+ mov r7, r7, push #\push
 176+ orr r7, r7, r6, pull #\pull
 177+ mov r6, r6, push #\push
 178+ orr r6, r6, r5, pull #\pull
 179+ mov r5, r5, push #\push
 180+ orr r5, r5, r4, pull #\pull
 181+ mov r4, r4, push #\push
 182+ orr r4, r4, r3, pull #\pull
 183+ stmdb r0!, {r4 - r9, ip, lr}
 184+ bge 12b
 185+
 186+ ldmfd sp!, {r5 - r9}
 187+
 188+14: ands ip, r2, #28
 189+ beq 16f
 190+
 191+15: mov lr, r3, push #\push
 192+ ldr r3, [r1, #-4]!
 193+ subs ip, ip, #4
 194+ orr lr, lr, r3, pull #\pull
 195+ str lr, [r0, #-4]!
 196+ bgt 15b
 197+
 198+16: add r1, r1, #(\pull / 8)
 199+ b 8b
 200+
 201+ .endm
 202+
 203+
 204+ backward_copy_shift push=8 pull=24
 205+
 206+17: backward_copy_shift push=16 pull=16
 207+
 208+18: backward_copy_shift push=24 pull=8
 209+
 210+
Index: embios/trunk/arm/memcpy-arm.S
@@ -0,0 +1,195 @@
 2+/***************************************************************************
 3+ * __________ __ ___.
 4+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
 5+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
 6+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
 7+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
 8+ * \/ \/ \/ \/ \/
 9+ * $Id$
 10+ *
 11+ * Copyright (C) 2006 Free Software Foundation, Inc.
 12+ * This file was originally part of the GNU C Library
 13+ * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
 14+ * Adapted for Rockbox by Daniel Ankers
 15+ *
 16+ * This program is free software; you can redistribute it and/or
 17+ * modify it under the terms of the GNU General Public License
 18+ * as published by the Free Software Foundation; either version 2
 19+ * of the License, or (at your option) any later version.
 20+ *
 21+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 22+ * KIND, either express or implied.
 23+ *
 24+ ****************************************************************************/
 25+
 26+#define ASM_FILE
 27+#include "global.h"
 28+
 29+/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
 30+.macro ldmpc cond="", order="ia", regs
 31+#if ARM_ARCH == 4 && defined(USE_THUMB)
 32+ ldm\cond\order sp!, { \regs, lr }
 33+ bx\cond lr
 34+#else
 35+ ldm\cond\order sp!, { \regs, pc }
 36+#endif
 37+.endm
 38+.macro ldrpc cond=""
 39+#if ARM_ARCH == 4 && defined(USE_THUMB)
 40+ ldr\cond lr, [sp], #4
 41+ bx\cond lr
 42+#else
 43+ ldr\cond pc, [sp], #4
 44+#endif
 45+.endm
 46+
 47+/*
 48+ * Endian independent macros for shifting bytes within registers.
 49+ */
 50+#ifndef __ARMEB__
 51+#define pull lsr
 52+#define push lsl
 53+#else
 54+#define pull lsl
 55+#define push lsr
 56+#endif
 57+
 58+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 59+
 60+ .section .icode,"ax",%progbits
 61+
 62+ .align 2
 63+ .global memcpy
 64+ .type memcpy,%function
 65+
 66+memcpy:
 67+ stmfd sp!, {r0, r4, lr}
 68+
 69+ subs r2, r2, #4
 70+ blt 8f
 71+ ands ip, r0, #3
 72+ bne 9f
 73+ ands ip, r1, #3
 74+ bne 10f
 75+
 76+1: subs r2, r2, #(28)
 77+ stmfd sp!, {r5 - r8}
 78+ blt 5f
 79+
 80+2:
 81+3:
 82+4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 83+ subs r2, r2, #32
 84+ stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 85+ bge 3b
 86+
 87+5: ands ip, r2, #28
 88+ rsb ip, ip, #32
 89+ addne pc, pc, ip @ C is always clear here
 90+ b 7f
 91+6: nop
 92+ ldr r3, [r1], #4
 93+ ldr r4, [r1], #4
 94+ ldr r5, [r1], #4
 95+ ldr r6, [r1], #4
 96+ ldr r7, [r1], #4
 97+ ldr r8, [r1], #4
 98+ ldr lr, [r1], #4
 99+
 100+ add pc, pc, ip
 101+ nop
 102+ nop
 103+ str r3, [r0], #4
 104+ str r4, [r0], #4
 105+ str r5, [r0], #4
 106+ str r6, [r0], #4
 107+ str r7, [r0], #4
 108+ str r8, [r0], #4
 109+ str lr, [r0], #4
 110+
 111+7: ldmfd sp!, {r5 - r8}
 112+
 113+8: movs r2, r2, lsl #31
 114+ ldrneb r3, [r1], #1
 115+ ldrcsb r4, [r1], #1
 116+ ldrcsb ip, [r1]
 117+ strneb r3, [r0], #1
 118+ strcsb r4, [r0], #1
 119+ strcsb ip, [r0]
 120+
 121+ ldmpc regs="r0, r4"
 122+
 123+9: rsb ip, ip, #4
 124+ cmp ip, #2
 125+ ldrgtb r3, [r1], #1
 126+ ldrgeb r4, [r1], #1
 127+ ldrb lr, [r1], #1
 128+ strgtb r3, [r0], #1
 129+ strgeb r4, [r0], #1
 130+ subs r2, r2, ip
 131+ strb lr, [r0], #1
 132+ blt 8b
 133+ ands ip, r1, #3
 134+ beq 1b
 135+
 136+10: bic r1, r1, #3
 137+ cmp ip, #2
 138+ ldr lr, [r1], #4
 139+ beq 17f
 140+ bgt 18f
 141+
 142+
 143+ .macro forward_copy_shift pull push
 144+
 145+ subs r2, r2, #28
 146+ blt 14f
 147+
 148+11: stmfd sp!, {r5 - r9}
 149+
 150+12:
 151+13: ldmia r1!, {r4, r5, r6, r7}
 152+ mov r3, lr, pull #\pull
 153+ subs r2, r2, #32
 154+ ldmia r1!, {r8, r9, ip, lr}
 155+ orr r3, r3, r4, push #\push
 156+ mov r4, r4, pull #\pull
 157+ orr r4, r4, r5, push #\push
 158+ mov r5, r5, pull #\pull
 159+ orr r5, r5, r6, push #\push
 160+ mov r6, r6, pull #\pull
 161+ orr r6, r6, r7, push #\push
 162+ mov r7, r7, pull #\pull
 163+ orr r7, r7, r8, push #\push
 164+ mov r8, r8, pull #\pull
 165+ orr r8, r8, r9, push #\push
 166+ mov r9, r9, pull #\pull
 167+ orr r9, r9, ip, push #\push
 168+ mov ip, ip, pull #\pull
 169+ orr ip, ip, lr, push #\push
 170+ stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
 171+ bge 12b
 172+
 173+ ldmfd sp!, {r5 - r9}
 174+
 175+14: ands ip, r2, #28
 176+ beq 16f
 177+
 178+15: mov r3, lr, pull #\pull
 179+ ldr lr, [r1], #4
 180+ subs ip, ip, #4
 181+ orr r3, r3, lr, push #\push
 182+ str r3, [r0], #4
 183+ bgt 15b
 184+
 185+16: sub r1, r1, #(\push / 8)
 186+ b 8b
 187+
 188+ .endm
 189+
 190+
 191+ forward_copy_shift pull=8 push=24
 192+
 193+17: forward_copy_shift pull=16 push=16
 194+
 195+18: forward_copy_shift pull=24 push=8
 196+
Index: embios/trunk/arm/memset-arm.S
@@ -0,0 +1,118 @@
 2+/***************************************************************************
 3+ * __________ __ ___.
 4+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
 5+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
 6+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
 7+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
 8+ * \/ \/ \/ \/ \/
 9+ * $Id$
 10+ *
 11+ * Copyright (C) 2006 by Thom Johansen
 12+ *
 13+ * This program is free software; you can redistribute it and/or
 14+ * modify it under the terms of the GNU General Public License
 15+ * as published by the Free Software Foundation; either version 2
 16+ * of the License, or (at your option) any later version.
 17+ *
 18+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 19+ * KIND, either express or implied.
 20+ *
 21+ ****************************************************************************/
 22+
 23+#define ASM_FILE
 24+#include "global.h"
 25+
 26+/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
 27+.macro ldmpc cond="", order="ia", regs
 28+#if ARM_ARCH == 4 && defined(USE_THUMB)
 29+ ldm\cond\order sp!, { \regs, lr }
 30+ bx\cond lr
 31+#else
 32+ ldm\cond\order sp!, { \regs, pc }
 33+#endif
 34+.endm
 35+.macro ldrpc cond=""
 36+#if ARM_ARCH == 4 && defined(USE_THUMB)
 37+ ldr\cond lr, [sp], #4
 38+ bx\cond lr
 39+#else
 40+ ldr\cond pc, [sp], #4
 41+#endif
 42+.endm
 43+
 44+ .section .icode,"ax",%progbits
 45+
 46+ .align 2
 47+
 48+/* The following code is based on code found in Linux kernel version 2.6.15.3
 49+ * linux/arch/arm/lib/memset.S
 50+ *
 51+ * Copyright (C) 1995-2000 Russell King
 52+ */
 53+
 54+/* This code will align a pointer for memset, if needed */
 55+1: cmp r2, #4 @ 1 do we have enough
 56+ blt 5f @ 1 bytes to align with?
 57+ cmp r3, #2 @ 1
 58+ strgtb r1, [r0, #-1]! @ 1
 59+ strgeb r1, [r0, #-1]! @ 1
 60+ strb r1, [r0, #-1]! @ 1
 61+ sub r2, r2, r3 @ 1 r2 = r2 - r3
 62+ b 2f
 63+
 64+ .global memset
 65+ .type memset,%function
 66+memset:
 67+ add r0, r0, r2 @ we'll write backwards in memory
 68+ ands r3, r0, #3 @ 1 unaligned?
 69+ bne 1b @ 1
 70+2:
 71+/*
 72+ * we know that the pointer in r0 is aligned to a word boundary.
 73+ */
 74+ orr r1, r1, r1, lsl #8
 75+ orr r1, r1, r1, lsl #16
 76+ mov r3, r1
 77+ cmp r2, #16
 78+ blt 5f
 79+/*
 80+ * We need an extra register for this loop - save the return address and
 81+ * use the LR
 82+ */
 83+ str lr, [sp, #-4]!
 84+ mov ip, r1
 85+ mov lr, r1
 86+
 87+3: subs r2, r2, #64
 88+ stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
 89+ stmgedb r0!, {r1, r3, ip, lr}
 90+ stmgedb r0!, {r1, r3, ip, lr}
 91+ stmgedb r0!, {r1, r3, ip, lr}
 92+ bgt 3b
 93+ ldrpc cond=eq @ Now <64 bytes to go.
 94+/*
 95+ * No need to correct the count; we're only testing bits from now on
 96+ */
 97+ tst r2, #32
 98+ stmnedb r0!, {r1, r3, ip, lr}
 99+ stmnedb r0!, {r1, r3, ip, lr}
 100+ tst r2, #16
 101+ stmnedb r0!, {r1, r3, ip, lr}
 102+ ldr lr, [sp], #4
 103+
 104+5: tst r2, #8
 105+ stmnedb r0!, {r1, r3}
 106+ tst r2, #4
 107+ strne r1, [r0, #-4]!
 108+/*
 109+ * When we get here, we've got less than 4 bytes to zero. We
 110+ * may have an unaligned pointer as well.
 111+ */
 112+6: tst r2, #2
 113+ strneb r1, [r0, #-1]!
 114+ strneb r1, [r0, #-1]!
 115+ tst r2, #1
 116+ strneb r1, [r0, #-1]!
 117+ bx lr
 118+.end:
 119+ .size memset,.end-memset
Index: embios/trunk/SOURCES
@@ -54,6 +54,9 @@
5555 #ifdef ARM_ARCH
5656 arm/arm-support.S
5757 arm/contextswitch.S
 58+arm/memcpy-arm.S
 59+arm/memmove-arm.S
 60+arm/memset-arm.S
5861 #endif
5962
6063 init.c
@@ -96,9 +99,6 @@
97100 libc/ctype.c
98101 libc/memchr.c
99102 libc/memcmp.c
100 -libc/memcpy.c
101 -libc/memmove.c
102 -libc/memset.c
103103 libc/mktime.c
104104 libc/qsort.c
105105 libc/random.c