634 lines
13 KiB
NASM
634 lines
13 KiB
NASM
|
@ libgcc1 routines for ARM cpu.
|
||
|
@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
|
||
|
|
||
|
/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
|
||
|
|
||
|
This file is free software; you can redistribute it and/or modify it
|
||
|
under the terms of the GNU General Public License as published by the
|
||
|
Free Software Foundation; either version 2, or (at your option) any
|
||
|
later version.
|
||
|
|
||
|
In addition to the permissions in the GNU General Public License, the
|
||
|
Free Software Foundation gives you unlimited permission to link the
|
||
|
compiled version of this file with other programs, and to distribute
|
||
|
those programs without any restriction coming from the use of this
|
||
|
file. (The General Public License restrictions do apply in other
|
||
|
respects; for example, they cover modification of the file, and
|
||
|
distribution when not linked into another program.)
|
||
|
|
||
|
This file is distributed in the hope that it will be useful, but
|
||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program; see the file COPYING. If not, write to
|
||
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||
|
Boston, MA 02111-1307, USA. */
|
||
|
|
||
|
/* As a special exception, if you link this library with other files,
|
||
|
some of which are compiled with GCC, to produce an executable,
|
||
|
this library does not by itself cause the resulting executable
|
||
|
to be covered by the GNU General Public License.
|
||
|
This exception does not however invalidate any other reasons why
|
||
|
the executable file might be covered by the GNU General Public License. */
|
||
|
|
||
|
.code 16
|
||
|
|
||
|
#define TYPE(x) .type SYM(x),function
|
||
|
#define SIZE(x) .size SYM(x), . - SYM(x)
|
||
|
|
||
|
#define RET mov pc, lr
|
||
|
|
||
|
#define SYM(x) x
|
||
|
|
||
|
work .req r4 @ XXXX is this safe ?
|
||
|
|
||
|
#ifdef L_udivsi3
|
||
|
|
||
|
dividend .req r0
|
||
|
divisor .req r1
|
||
|
result .req r2
|
||
|
curbit .req r3
|
||
|
|
||
|
.text
|
||
|
.globl SYM (__udivsi3)
|
||
|
TYPE (__udivsi3)
|
||
|
.align 2, 0
|
||
|
.thumb_func
|
||
|
SYM (__udivsi3):
|
||
|
cmp divisor, #0
|
||
|
beq Ldiv0
|
||
|
mov curbit, #1
|
||
|
mov result, #0
|
||
|
|
||
|
push { work }
|
||
|
cmp dividend, divisor
|
||
|
bcc Lgot_result
|
||
|
|
||
|
@ Load the constant 0x10000000 into our work register
|
||
|
mov work, #1
|
||
|
lsl work, #28
|
||
|
Loop1:
|
||
|
@ Unless the divisor is very big, shift it up in multiples of
|
||
|
@ four bits, since this is the amount of unwinding in the main
|
||
|
@ division loop. Continue shifting until the divisor is
|
||
|
@ larger than the dividend.
|
||
|
cmp divisor, work
|
||
|
bcs Lbignum
|
||
|
cmp divisor, dividend
|
||
|
bcs Lbignum
|
||
|
lsl divisor, #4
|
||
|
lsl curbit, #4
|
||
|
b Loop1
|
||
|
|
||
|
Lbignum:
|
||
|
@ Set work to 0x80000000
|
||
|
lsl work, #3
|
||
|
Loop2:
|
||
|
@ For very big divisors, we must shift it a bit at a time, or
|
||
|
@ we will be in danger of overflowing.
|
||
|
cmp divisor, work
|
||
|
bcs Loop3
|
||
|
cmp divisor, dividend
|
||
|
bcs Loop3
|
||
|
lsl divisor, #1
|
||
|
lsl curbit, #1
|
||
|
b Loop2
|
||
|
|
||
|
Loop3:
|
||
|
@ Test for possible subtractions, and note which bits
|
||
|
@ are done in the result. On the final pass, this may subtract
|
||
|
@ too much from the dividend, but the result will be ok, since the
|
||
|
@ "bit" will have been shifted out at the bottom.
|
||
|
cmp dividend, divisor
|
||
|
bcc Over1
|
||
|
sub dividend, dividend, divisor
|
||
|
orr result, result, curbit
|
||
|
Over1:
|
||
|
lsr work, divisor, #1
|
||
|
cmp dividend, work
|
||
|
bcc Over2
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #1
|
||
|
orr result, work
|
||
|
Over2:
|
||
|
lsr work, divisor, #2
|
||
|
cmp dividend, work
|
||
|
bcc Over3
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #2
|
||
|
orr result, work
|
||
|
Over3:
|
||
|
lsr work, divisor, #3
|
||
|
cmp dividend, work
|
||
|
bcc Over4
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #3
|
||
|
orr result, work
|
||
|
Over4:
|
||
|
cmp dividend, #0 @ Early termination?
|
||
|
beq Lgot_result
|
||
|
lsr curbit, #4 @ No, any more bits to do?
|
||
|
beq Lgot_result
|
||
|
lsr divisor, #4
|
||
|
b Loop3
|
||
|
Lgot_result:
|
||
|
mov r0, result
|
||
|
pop { work }
|
||
|
RET
|
||
|
|
||
|
Ldiv0:
|
||
|
push { lr }
|
||
|
bl SYM (__div0)
|
||
|
mov r0, #0 @ about as wrong as it could be
|
||
|
pop { pc }
|
||
|
|
||
|
SIZE (__udivsi3)
|
||
|
|
||
|
#endif /* L_udivsi3 */
|
||
|
|
||
|
#ifdef L_umodsi3
|
||
|
|
||
|
dividend .req r0
|
||
|
divisor .req r1
|
||
|
overdone .req r2
|
||
|
curbit .req r3
|
||
|
|
||
|
.text
|
||
|
.globl SYM (__umodsi3)
|
||
|
TYPE (__umodsi3)
|
||
|
.align 2, 0
|
||
|
.thumb_func
|
||
|
SYM (__umodsi3):
|
||
|
cmp divisor, #0
|
||
|
beq Ldiv0
|
||
|
mov curbit, #1
|
||
|
cmp dividend, divisor
|
||
|
bcs Over1
|
||
|
RET
|
||
|
|
||
|
Over1:
|
||
|
@ Load the constant 0x10000000 into our work register
|
||
|
push { work }
|
||
|
mov work, #1
|
||
|
lsl work, #28
|
||
|
Loop1:
|
||
|
@ Unless the divisor is very big, shift it up in multiples of
|
||
|
@ four bits, since this is the amount of unwinding in the main
|
||
|
@ division loop. Continue shifting until the divisor is
|
||
|
@ larger than the dividend.
|
||
|
cmp divisor, work
|
||
|
bcs Lbignum
|
||
|
cmp divisor, dividend
|
||
|
bcs Lbignum
|
||
|
lsl divisor, #4
|
||
|
lsl curbit, #4
|
||
|
b Loop1
|
||
|
|
||
|
Lbignum:
|
||
|
@ Set work to 0x80000000
|
||
|
lsl work, #3
|
||
|
Loop2:
|
||
|
@ For very big divisors, we must shift it a bit at a time, or
|
||
|
@ we will be in danger of overflowing.
|
||
|
cmp divisor, work
|
||
|
bcs Loop3
|
||
|
cmp divisor, dividend
|
||
|
bcs Loop3
|
||
|
lsl divisor, #1
|
||
|
lsl curbit, #1
|
||
|
b Loop2
|
||
|
|
||
|
Loop3:
|
||
|
@ Test for possible subtractions. On the final pass, this may
|
||
|
@ subtract too much from the dividend, so keep track of which
|
||
|
@ subtractions are done, we can fix them up afterwards...
|
||
|
mov overdone, #0
|
||
|
cmp dividend, divisor
|
||
|
bcc Over2
|
||
|
sub dividend, dividend, divisor
|
||
|
Over2:
|
||
|
lsr work, divisor, #1
|
||
|
cmp dividend, work
|
||
|
bcc Over3
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #1
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over3:
|
||
|
lsr work, divisor, #2
|
||
|
cmp dividend, work
|
||
|
bcc Over4
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #2
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over4:
|
||
|
lsr work, divisor, #3
|
||
|
cmp dividend, work
|
||
|
bcc Over5
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #3
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over5:
|
||
|
mov ip, curbit
|
||
|
cmp dividend, #0 @ Early termination?
|
||
|
beq Over6
|
||
|
lsr curbit, #4 @ No, any more bits to do?
|
||
|
beq Over6
|
||
|
lsr divisor, #4
|
||
|
b Loop3
|
||
|
|
||
|
Over6:
|
||
|
@ Any subtractions that we should not have done will be recorded in
|
||
|
@ the top three bits of "overdone". Exactly which were not needed
|
||
|
@ are governed by the position of the bit, stored in ip.
|
||
|
@ If we terminated early, because dividend became zero,
|
||
|
@ then none of the below will match, since the bit in ip will not be
|
||
|
@ in the bottom nibble.
|
||
|
|
||
|
mov work, #0xe
|
||
|
lsl work, #28
|
||
|
and overdone, work
|
||
|
bne Over7
|
||
|
pop { work }
|
||
|
RET @ No fixups needed
|
||
|
Over7:
|
||
|
mov curbit, ip
|
||
|
mov work, #3
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Over8
|
||
|
lsr work, divisor, #3
|
||
|
add dividend, dividend, work
|
||
|
Over8:
|
||
|
mov curbit, ip
|
||
|
mov work, #2
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Over9
|
||
|
lsr work, divisor, #2
|
||
|
add dividend, dividend, work
|
||
|
Over9:
|
||
|
mov curbit, ip
|
||
|
mov work, #1
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Over10
|
||
|
lsr work, divisor, #1
|
||
|
add dividend, dividend, work
|
||
|
Over10:
|
||
|
pop { work }
|
||
|
RET
|
||
|
|
||
|
Ldiv0:
|
||
|
push { lr }
|
||
|
bl SYM (__div0)
|
||
|
mov r0, #0 @ about as wrong as it could be
|
||
|
pop { pc }
|
||
|
|
||
|
SIZE (__umodsi3)
|
||
|
|
||
|
#endif /* L_umodsi3 */
|
||
|
|
||
|
#ifdef L_divsi3
|
||
|
|
||
|
dividend .req r0
|
||
|
divisor .req r1
|
||
|
result .req r2
|
||
|
curbit .req r3
|
||
|
|
||
|
.text
|
||
|
.globl SYM (__divsi3)
|
||
|
TYPE (__divsi3)
|
||
|
.align 2, 0
|
||
|
.thumb_func
|
||
|
SYM (__divsi3):
|
||
|
cmp divisor, #0
|
||
|
beq Ldiv0
|
||
|
|
||
|
push { work }
|
||
|
mov work, dividend
|
||
|
eor work, divisor @ Save the sign of the result.
|
||
|
mov ip, work
|
||
|
mov curbit, #1
|
||
|
mov result, #0
|
||
|
cmp divisor, #0
|
||
|
bpl Over1
|
||
|
neg divisor, divisor @ Loops below use unsigned.
|
||
|
Over1:
|
||
|
cmp dividend, #0
|
||
|
bpl Over2
|
||
|
neg dividend, dividend
|
||
|
Over2:
|
||
|
cmp dividend, divisor
|
||
|
bcc Lgot_result
|
||
|
|
||
|
mov work, #1
|
||
|
lsl work, #28
|
||
|
Loop1:
|
||
|
@ Unless the divisor is very big, shift it up in multiples of
|
||
|
@ four bits, since this is the amount of unwinding in the main
|
||
|
@ division loop. Continue shifting until the divisor is
|
||
|
@ larger than the dividend.
|
||
|
cmp divisor, work
|
||
|
Bcs Lbignum
|
||
|
cmp divisor, dividend
|
||
|
Bcs Lbignum
|
||
|
lsl divisor, #4
|
||
|
lsl curbit, #4
|
||
|
b Loop1
|
||
|
|
||
|
Lbignum:
|
||
|
@ For very big divisors, we must shift it a bit at a time, or
|
||
|
@ we will be in danger of overflowing.
|
||
|
lsl work, #3
|
||
|
Loop2:
|
||
|
cmp divisor, work
|
||
|
Bcs Loop3
|
||
|
cmp divisor, dividend
|
||
|
Bcs Loop3
|
||
|
lsl divisor, #1
|
||
|
lsl curbit, #1
|
||
|
b Loop2
|
||
|
|
||
|
Loop3:
|
||
|
@ Test for possible subtractions, and note which bits
|
||
|
@ are done in the result. On the final pass, this may subtract
|
||
|
@ too much from the dividend, but the result will be ok, since the
|
||
|
@ "bit" will have been shifted out at the bottom.
|
||
|
cmp dividend, divisor
|
||
|
Bcc Over3
|
||
|
sub dividend, dividend, divisor
|
||
|
orr result, result, curbit
|
||
|
Over3:
|
||
|
lsr work, divisor, #1
|
||
|
cmp dividend, work
|
||
|
Bcc Over4
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #1
|
||
|
orr result, work
|
||
|
Over4:
|
||
|
lsr work, divisor, #2
|
||
|
cmp dividend, work
|
||
|
Bcc Over5
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #2
|
||
|
orr result, result, work
|
||
|
Over5:
|
||
|
lsr work, divisor, #3
|
||
|
cmp dividend, work
|
||
|
Bcc Over6
|
||
|
sub dividend, dividend, work
|
||
|
lsr work, curbit, #3
|
||
|
orr result, result, work
|
||
|
Over6:
|
||
|
cmp dividend, #0 @ Early termination?
|
||
|
Beq Lgot_result
|
||
|
lsr curbit, #4 @ No, any more bits to do?
|
||
|
Beq Lgot_result
|
||
|
lsr divisor, #4
|
||
|
b Loop3
|
||
|
|
||
|
Lgot_result:
|
||
|
mov r0, result
|
||
|
mov work, ip
|
||
|
cmp work, #0
|
||
|
Bpl Over7
|
||
|
neg r0, r0
|
||
|
Over7:
|
||
|
pop { work }
|
||
|
RET
|
||
|
|
||
|
Ldiv0:
|
||
|
push { lr }
|
||
|
bl SYM (__div0)
|
||
|
mov r0, #0 @ about as wrong as it could be
|
||
|
pop { pc }
|
||
|
|
||
|
SIZE (__divsi3)
|
||
|
|
||
|
#endif /* L_divsi3 */
|
||
|
|
||
|
#ifdef L_modsi3
|
||
|
|
||
|
dividend .req r0
|
||
|
divisor .req r1
|
||
|
overdone .req r2
|
||
|
curbit .req r3
|
||
|
|
||
|
.text
|
||
|
.globl SYM (__modsi3)
|
||
|
TYPE (__modsi3)
|
||
|
.align 2, 0
|
||
|
.thumb_func
|
||
|
SYM (__modsi3):
|
||
|
mov curbit, #1
|
||
|
cmp divisor, #0
|
||
|
beq Ldiv0
|
||
|
Bpl Over1
|
||
|
neg divisor, divisor @ Loops below use unsigned.
|
||
|
Over1:
|
||
|
push { work }
|
||
|
@ Need to save the sign of the dividend, unfortunately, we need
|
||
|
@ ip later on. Must do this after saving the original value of
|
||
|
@ the work register, because we will pop this value off first.
|
||
|
push { dividend }
|
||
|
cmp dividend, #0
|
||
|
Bpl Over2
|
||
|
neg dividend, dividend
|
||
|
Over2:
|
||
|
cmp dividend, divisor
|
||
|
bcc Lgot_result
|
||
|
mov work, #1
|
||
|
lsl work, #28
|
||
|
Loop1:
|
||
|
@ Unless the divisor is very big, shift it up in multiples of
|
||
|
@ four bits, since this is the amount of unwinding in the main
|
||
|
@ division loop. Continue shifting until the divisor is
|
||
|
@ larger than the dividend.
|
||
|
cmp divisor, work
|
||
|
bcs Lbignum
|
||
|
cmp divisor, dividend
|
||
|
bcs Lbignum
|
||
|
lsl divisor, #4
|
||
|
lsl curbit, #4
|
||
|
b Loop1
|
||
|
|
||
|
Lbignum:
|
||
|
@ Set work to 0x80000000
|
||
|
lsl work, #3
|
||
|
Loop2:
|
||
|
@ For very big divisors, we must shift it a bit at a time, or
|
||
|
@ we will be in danger of overflowing.
|
||
|
cmp divisor, work
|
||
|
bcs Loop3
|
||
|
cmp divisor, dividend
|
||
|
bcs Loop3
|
||
|
lsl divisor, #1
|
||
|
lsl curbit, #1
|
||
|
b Loop2
|
||
|
|
||
|
Loop3:
|
||
|
@ Test for possible subtractions. On the final pass, this may
|
||
|
@ subtract too much from the dividend, so keep track of which
|
||
|
@ subtractions are done, we can fix them up afterwards...
|
||
|
mov overdone, #0
|
||
|
cmp dividend, divisor
|
||
|
bcc Over3
|
||
|
sub dividend, dividend, divisor
|
||
|
Over3:
|
||
|
lsr work, divisor, #1
|
||
|
cmp dividend, work
|
||
|
bcc Over4
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #1
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over4:
|
||
|
lsr work, divisor, #2
|
||
|
cmp dividend, work
|
||
|
bcc Over5
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #2
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over5:
|
||
|
lsr work, divisor, #3
|
||
|
cmp dividend, work
|
||
|
bcc Over6
|
||
|
sub dividend, dividend, work
|
||
|
mov ip, curbit
|
||
|
mov work, #3
|
||
|
ror curbit, work
|
||
|
orr overdone, curbit
|
||
|
mov curbit, ip
|
||
|
Over6:
|
||
|
mov ip, curbit
|
||
|
cmp dividend, #0 @ Early termination?
|
||
|
beq Over7
|
||
|
lsr curbit, #4 @ No, any more bits to do?
|
||
|
beq Over7
|
||
|
lsr divisor, #4
|
||
|
b Loop3
|
||
|
|
||
|
Over7:
|
||
|
@ Any subtractions that we should not have done will be recorded in
|
||
|
@ the top three bits of "overdone". Exactly which were not needed
|
||
|
@ are governed by the position of the bit, stored in ip.
|
||
|
@ If we terminated early, because dividend became zero,
|
||
|
@ then none of the below will match, since the bit in ip will not be
|
||
|
@ in the bottom nibble.
|
||
|
mov work, #0xe
|
||
|
lsl work, #28
|
||
|
and overdone, work
|
||
|
beq Lgot_result
|
||
|
|
||
|
mov curbit, ip
|
||
|
mov work, #3
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Over8
|
||
|
lsr work, divisor, #3
|
||
|
add dividend, dividend, work
|
||
|
Over8:
|
||
|
mov curbit, ip
|
||
|
mov work, #2
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Over9
|
||
|
lsr work, divisor, #2
|
||
|
add dividend, dividend, work
|
||
|
Over9:
|
||
|
mov curbit, ip
|
||
|
mov work, #1
|
||
|
ror curbit, work
|
||
|
tst overdone, curbit
|
||
|
beq Lgot_result
|
||
|
lsr work, divisor, #1
|
||
|
add dividend, dividend, work
|
||
|
Lgot_result:
|
||
|
pop { work }
|
||
|
cmp work, #0
|
||
|
bpl Over10
|
||
|
neg dividend, dividend
|
||
|
Over10:
|
||
|
pop { work }
|
||
|
RET
|
||
|
|
||
|
Ldiv0:
|
||
|
push { lr }
|
||
|
bl SYM (__div0)
|
||
|
mov r0, #0 @ about as wrong as it could be
|
||
|
pop { pc }
|
||
|
|
||
|
SIZE (__modsi3)
|
||
|
|
||
|
#endif /* L_modsi3 */
|
||
|
|
||
|
#ifdef L_dvmd_tls
|
||
|
|
||
|
.globl SYM (__div0)
|
||
|
TYPE (__div0)
|
||
|
.align 2, 0
|
||
|
.thumb_func
|
||
|
SYM (__div0):
|
||
|
RET
|
||
|
|
||
|
SIZE (__div0)
|
||
|
|
||
|
#endif /* L_divmodsi_tools */
|
||
|
|
||
|
|
||
|
#ifdef L_call_via_rX
|
||
|
|
||
|
/* These labels & instructions are used by the Arm/Thumb interworking code.
|
||
|
The address of function to be called is loaded into a register and then
|
||
|
one of these labels is called via a BL instruction. This puts the
|
||
|
return address into the link register with the bottom bit set, and the
|
||
|
code here switches to the correct mode before executing the function. */
|
||
|
|
||
|
.text
|
||
|
.align 2, 0
|
||
|
|
||
|
.macro call_via register
|
||
|
.globl SYM (_call_via_\register)
|
||
|
TYPE (_call_via_\register)
|
||
|
.thumb_func
|
||
|
SYM (_call_via_\register):
|
||
|
bx \register
|
||
|
nop
|
||
|
|
||
|
SIZE (_call_via_\register)
|
||
|
.endm
|
||
|
|
||
|
call_via r0
|
||
|
call_via r1
|
||
|
call_via r2
|
||
|
call_via r3
|
||
|
call_via r4
|
||
|
call_via r5
|
||
|
call_via r6
|
||
|
call_via r7
|
||
|
call_via r8
|
||
|
call_via r9
|
||
|
call_via sl
|
||
|
call_via fp
|
||
|
call_via ip
|
||
|
call_via sp
|
||
|
call_via lr
|
||
|
|
||
|
#endif /* L_call_via_rX */
|