From dfeab0689f69c0b4bd3480ffd37a9cacc2f17d9c Mon Sep 17 00:00:00 2001 From: "Ralf S. Engelschall" Date: Mon, 21 Dec 1998 11:00:56 +0000 Subject: Import of old SSLeay release: SSLeay 0.9.1b (unreleased) --- crypto/bn/asm/mips1.s | 539 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 539 insertions(+) create mode 100644 crypto/bn/asm/mips1.s (limited to 'crypto/bn/asm/mips1.s') diff --git a/crypto/bn/asm/mips1.s b/crypto/bn/asm/mips1.s new file mode 100644 index 0000000000..44fa1254c7 --- /dev/null +++ b/crypto/bn/asm/mips1.s @@ -0,0 +1,539 @@ +/* This assember is for R2000/R3000 machines, or higher ones that do + * no want to do any 64 bit arithmatic. + * Make sure that the SSLeay bignum library is compiled with + * THIRTY_TWO_BIT set. + * This must either be compiled with the system CC, or, if you use GNU gas, + * cc -E mips1.s|gas -o mips1.o + */ + .set reorder + .set noat + +#define R1 $1 +#define CC $2 +#define R2 $3 +#define R3 $8 +#define R4 $9 +#define L1 $10 +#define L2 $11 +#define L3 $12 +#define L4 $13 +#define H1 $14 +#define H2 $15 +#define H3 $24 +#define H4 $25 + +#define P1 $4 +#define P2 $5 +#define P3 $6 +#define P4 $7 + + .align 2 + .ent bn_mul_add_words + .globl bn_mul_add_words +.text +bn_mul_add_words: + .frame $sp,0,$31 + .mask 0x00000000,0 + .fmask 0x00000000,0 + + #blt P3,4,$lab34 + + subu R1,P3,4 + move CC,$0 + bltz R1,$lab34 +$lab2: + lw R1,0(P1) + lw L1,0(P2) + lw R2,4(P1) + lw L2,4(P2) + lw R3,8(P1) + lw L3,8(P2) + lw R4,12(P1) + lw L4,12(P2) + multu L1,P4 + addu R1,R1,CC + mflo L1 + sltu CC,R1,CC + addu R1,R1,L1 + mfhi H1 + sltu L1,R1,L1 + sw R1,0(P1) + addu CC,CC,L1 + multu L2,P4 + addu CC,H1,CC + mflo L2 + addu R2,R2,CC + sltu CC,R2,CC + mfhi H2 + addu R2,R2,L2 + addu P2,P2,16 + sltu L2,R2,L2 + sw R2,4(P1) + addu CC,CC,L2 + multu L3,P4 + addu CC,H2,CC + mflo L3 + addu R3,R3,CC + sltu CC,R3,CC + mfhi H3 + addu R3,R3,L3 + addu P1,P1,16 + sltu L3,R3,L3 + sw R3,-8(P1) + addu CC,CC,L3 + multu L4,P4 + addu CC,H3,CC + mflo L4 + addu R4,R4,CC + sltu CC,R4,CC + mfhi H4 + addu R4,R4,L4 + subu P3,P3,4 + sltu L4,R4,L4 + addu CC,CC,L4 + addu CC,H4,CC + + subu R1,P3,4 + sw R4,-4(P1) # delay slot + bgez R1,$lab2 + + bleu P3,0,$lab3 + .align 2 +$lab33: + lw L1,0(P2) + lw R1,0(P1) + multu L1,P4 + addu R1,R1,CC + sltu CC,R1,CC + addu P1,P1,4 + mflo L1 + mfhi H1 + addu R1,R1,L1 + addu P2,P2,4 + sltu L1,R1,L1 + subu P3,P3,1 + addu CC,CC,L1 + sw R1,-4(P1) + addu CC,H1,CC + bgtz P3,$lab33 + j $31 + .align 2 +$lab3: + j $31 + .align 2 +$lab34: + bgt P3,0,$lab33 + j $31 + .end bn_mul_add_words + + .align 2 + # Program Unit: bn_mul_words + .ent bn_mul_words + .globl bn_mul_words +.text +bn_mul_words: + .frame $sp,0,$31 + .mask 0x00000000,0 + .fmask 0x00000000,0 + + subu P3,P3,4 + move CC,$0 + bltz P3,$lab45 +$lab44: + lw L1,0(P2) + lw L2,4(P2) + lw L3,8(P2) + lw L4,12(P2) + multu L1,P4 + subu P3,P3,4 + mflo L1 + mfhi H1 + addu L1,L1,CC + multu L2,P4 + sltu CC,L1,CC + sw L1,0(P1) + addu CC,H1,CC + mflo L2 + mfhi H2 + addu L2,L2,CC + multu L3,P4 + sltu CC,L2,CC + sw L2,4(P1) + addu CC,H2,CC + mflo L3 + mfhi H3 + addu L3,L3,CC + multu L4,P4 + sltu CC,L3,CC + sw L3,8(P1) + addu CC,H3,CC + mflo L4 + mfhi H4 + addu L4,L4,CC + addu P1,P1,16 + sltu CC,L4,CC + addu P2,P2,16 + addu CC,H4,CC + sw L4,-4(P1) + + bgez P3,$lab44 + b $lab45 +$lab46: + lw L1,0(P2) + addu P1,P1,4 + multu L1,P4 + addu P2,P2,4 + mflo L1 + mfhi H1 + addu L1,L1,CC + subu P3,P3,1 + sltu CC,L1,CC + sw L1,-4(P1) + addu CC,H1,CC + bgtz P3,$lab46 + j $31 +$lab45: + addu P3,P3,4 + bgtz P3,$lab46 + j $31 + .align 2 + .end bn_mul_words + + # Program Unit: bn_sqr_words + .ent bn_sqr_words + .globl bn_sqr_words +.text +bn_sqr_words: + .frame $sp,0,$31 + .mask 0x00000000,0 + .fmask 0x00000000,0 + + subu P3,P3,4 + bltz P3,$lab55 +$lab54: + lw L1,0(P2) + lw L2,4(P2) + lw L3,8(P2) + lw L4,12(P2) + + multu L1,L1 + subu P3,P3,4 + mflo L1 + mfhi H1 + sw L1,0(P1) + sw H1,4(P1) + + multu L2,L2 + addu P1,P1,32 + mflo L2 + mfhi H2 + sw L2,-24(P1) + sw H2,-20(P1) + + multu L3,L3 + addu P2,P2,16 + mflo L3 + mfhi H3 + sw L3,-16(P1) + sw H3,-12(P1) + + multu L4,L4 + + mflo L4 + mfhi H4 + sw L4,-8(P1) + sw H4,-4(P1) + + bgtz P3,$lab54 + b $lab55 +$lab56: + lw L1,0(P2) + addu P1,P1,8 + multu L1,L1 + addu P2,P2,4 + subu P3,P3,1 + mflo L1 + mfhi H1 + sw L1,-8(P1) + sw H1,-4(P1) + + bgtz P3,$lab56 + j $31 +$lab55: + addu P3,P3,4 + bgtz P3,$lab56 + j $31 + .align 2 + .end bn_sqr_words + + # Program Unit: bn_add_words + .ent bn_add_words + .globl bn_add_words +.text +bn_add_words: # 0x590 + .frame $sp,0,$31 + .mask 0x00000000,0 + .fmask 0x00000000,0 + + subu P4,P4,4 + move CC,$0 + bltz P4,$lab65 +$lab64: + lw L1,0(P2) + lw R1,0(P3) + lw L2,4(P2) + lw R2,4(P3) + + addu L1,L1,CC + lw L3,8(P2) + sltu CC,L1,CC + addu L1,L1,R1 + sltu R1,L1,R1 + lw R3,8(P3) + addu CC,CC,R1 + lw L4,12(P2) + + addu L2,L2,CC + lw R4,12(P3) + sltu CC,L2,CC + addu L2,L2,R2 + sltu R2,L2,R2 + sw L1,0(P1) + addu CC,CC,R2 + addu P1,P1,16 + addu L3,L3,CC + sw L2,-12(P1) + + sltu CC,L3,CC + addu L3,L3,R3 + sltu R3,L3,R3 + addu P2,P2,16 + addu CC,CC,R3 + + addu L4,L4,CC + addu P3,P3,16 + sltu CC,L4,CC + addu L4,L4,R4 + subu P4,P4,4 + sltu R4,L4,R4 + sw L3,-8(P1) + addu CC,CC,R4 + sw L4,-4(P1) + + bgtz P4,$lab64 + b $lab65 +$lab66: + lw L1,0(P2) + lw R1,0(P3) + addu L1,L1,CC + addu P1,P1,4 + sltu CC,L1,CC + addu P2,P2,4 + addu P3,P3,4 + addu L1,L1,R1 + subu P4,P4,1 + sltu R1,L1,R1 + sw L1,-4(P1) + addu CC,CC,R1 + + bgtz P4,$lab66 + j $31 +$lab65: + addu P4,P4,4 + bgtz P4,$lab66 + j $31 + .end bn_add_words + + # Program Unit: bn_div64 + .set at + .set reorder + .text + .align 2 + .globl bn_div64 + # 321 { + .ent bn_div64 2 +bn_div64: + subu $sp, 64 + sw $31, 56($sp) + sw $16, 48($sp) + .mask 0x80010000, -56 + .frame $sp, 64, $31 + move $9, $4 + move $12, $5 + move $16, $6 + # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t; + move $31, $0 + # 323 int i,count=2; + li $13, 2 + # 324 + # 325 if (d == 0) return(BN_MASK2); + bne $16, 0, $80 + li $2, -1 + b $93 +$80: + # 326 + # 327 i=BN_num_bits_word(d); + move $4, $16 + sw $31, 16($sp) + sw $9, 24($sp) + sw $12, 32($sp) + sw $13, 40($sp) + .livereg 0x800ff0e,0xfff + jal BN_num_bits_word + li $4, 32 + lw $31, 16($sp) + lw $9, 24($sp) + lw $12, 32($sp) + lw $13, 40($sp) + move $3, $2 + # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<= d) h-=d; + bltu $9, $16, $82 + subu $9, $9, $16 +$82: + # 337 + # 338 if (i) + beq $3, 0, $83 + # 339 { + # 340 d<<=i; + sll $16, $16, $3 + # 341 h=(h<>(BN_BITS2-i)); + sll $24, $9, $3 + subu $25, $4, $3 + srl $14, $12, $25 + or $9, $24, $14 + # 342 l<<=i; + sll $12, $12, $3 + # 343 } +$83: + # 344 dh=(d&BN_MASK2h)>>BN_BITS4; + # 345 dl=(d&BN_MASK2l); + and $8, $16, -65536 + srl $8, $8, 16 + and $10, $16, 65535 + li $6, -65536 +$84: + # 346 for (;;) + # 347 { + # 348 if ((h>>BN_BITS4) == dh) + srl $15, $9, 16 + bne $8, $15, $85 + # 349 q=BN_MASK2l; + li $5, 65535 + b $86 +$85: + # 350 else + # 351 q=h/dh; + divu $5, $9, $8 +$86: + # 352 + # 353 for (;;) + # 354 { + # 355 t=(h-q*dh); + mul $4, $5, $8 + subu $2, $9, $4 + move $3, $2 + # 356 if ((t&BN_MASK2h) || + # 357 ((dl*q) <= ( + # 358 (t<>BN_BITS4)))) + and $25, $2, $6 + bne $25, $0, $87 + mul $24, $10, $5 + sll $14, $3, 16 + and $15, $12, $6 + srl $25, $15, 16 + addu $15, $14, $25 + bgtu $24, $15, $88 +$87: + # 360 break; + mul $3, $10, $5 + b $89 +$88: + # 361 q--; + addu $5, $5, -1 + # 362 } + b $86 +$89: + # 363 th=q*dh; + # 364 tl=q*dl; + # 365 t=(tl>>BN_BITS4); + # 366 tl=(tl<>BN_BITS4))&BN_MASK2; + sll $24, $9, 16 + srl $15, $12, 16 + or $9, $24, $15 + # 382 l=(l&BN_MASK2l)<