From 609b0852e4d50251857dbbac3141ba042e35a9ae Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 10 Oct 2016 12:01:24 -0400 Subject: Remove trailing whitespace from some files. The prevailing style seems to not have trailing whitespace, but a few lines do. This is mostly in the perlasm files, but a few C files got them after the reformat. This is the result of: find . -name '*.pl' | xargs sed -E -i '' -e 's/( |'$'\t'')*$//' find . -name '*.c' | xargs sed -E -i '' -e 's/( |'$'\t'')*$//' find . -name '*.h' | xargs sed -E -i '' -e 's/( |'$'\t'')*$//' Then bn_prime.h was excluded since this is a generated file. Note mkerr.pl has some changes in a heredoc for some help output, but other lines there lack trailing whitespace too. Reviewed-by: Kurt Roeckx Reviewed-by: Matt Caswell --- crypto/bn/asm/armv4-gf2m.pl | 2 +- crypto/bn/asm/armv4-mont.pl | 2 +- crypto/bn/asm/bn-586.pl | 24 ++-- crypto/bn/asm/co-586.pl | 12 +- crypto/bn/asm/ia64-mont.pl | 4 +- crypto/bn/asm/mips.pl | 6 +- crypto/bn/asm/parisc-mont.pl | 4 +- crypto/bn/asm/ppc-mont.pl | 6 +- crypto/bn/asm/ppc.pl | 264 +++++++++++++++++++++--------------------- crypto/bn/asm/rsaz-avx2.pl | 8 +- crypto/bn/asm/rsaz-x86_64.pl | 36 +++--- crypto/bn/asm/s390x-gf2m.pl | 2 +- crypto/bn/asm/via-mont.pl | 2 +- crypto/bn/asm/x86-mont.pl | 2 +- crypto/bn/asm/x86_64-mont5.pl | 10 +- 15 files changed, 192 insertions(+), 192 deletions(-) (limited to 'crypto/bn') diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index 0bb5433075..7a0cdb2e8a 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -36,7 +36,7 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf $flavour = shift; diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl index 0dc4fe95e4..75a36f62fa 100644 --- a/crypto/bn/asm/armv4-mont.pl +++ b/crypto/bn/asm/armv4-mont.pl @@ -23,7 +23,7 @@ # [depending on key length, less for longer keys] on ARM920T, and # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code # base and compiler generated code with in-lined umull and even umlal -# instructions. The latter means that this code didn't really have an +# instructions. The latter means that this code didn't really have an # "advantage" of utilizing some "secret" instruction. # # The code is interoperable with Thumb ISA and is rather compact, less diff --git a/crypto/bn/asm/bn-586.pl b/crypto/bn/asm/bn-586.pl index 1ca1bbf7d4..1350bcd8fa 100644 --- a/crypto/bn/asm/bn-586.pl +++ b/crypto/bn/asm/bn-586.pl @@ -54,7 +54,7 @@ sub bn_mul_add_words &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in &jmp(&label("maw_sse2_entry")); - + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] @@ -675,20 +675,20 @@ sub bn_sub_part_words &adc($c,0); &mov(&DWP($i*4,$r,"",0),$tmp1); # *r } - + &comment(""); &add($b,32); &add($r,32); &sub($num,8); &jnz(&label("pw_neg_loop")); - + &set_label("pw_neg_finish",0); &mov($tmp2,&wparam(4)); # get dl &mov($num,0); &sub($num,$tmp2); &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl<0 Tail Round $i"); @@ -705,9 +705,9 @@ sub bn_sub_part_words } &jmp(&label("pw_end")); - + &set_label("pw_pos",0); - + &and($num,0xfffffff8); # num / 8 &jz(&label("pw_pos_finish")); @@ -722,18 +722,18 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jnc(&label("pw_nc".$i)); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_pos_loop")); - + &set_label("pw_pos_finish",0); &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl>0 Tail Round $i"); @@ -754,17 +754,17 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &set_label("pw_nc".$i,0); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_nc_loop")); - + &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_nc_end")); - + for ($i=0; $i<7; $i++) { &mov($tmp1,&DWP($i*4,$a,"",0)); # *a diff --git a/crypto/bn/asm/co-586.pl b/crypto/bn/asm/co-586.pl index 60d0363660..6f34c37cf8 100644 --- a/crypto/bn/asm/co-586.pl +++ b/crypto/bn/asm/co-586.pl @@ -47,7 +47,7 @@ sub mul_add_c &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a } @@ -76,7 +76,7 @@ sub sqr_add_c &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b } @@ -127,7 +127,7 @@ sub bn_mul_comba $c2="ebp"; $a="esi"; $b="edi"; - + $as=0; $ae=0; $bs=0; @@ -142,9 +142,9 @@ sub bn_mul_comba &push("ebx"); &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word + &mov("eax",&DWP(0,$a,"",0)); # load the first word &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second + &mov("edx",&DWP(0,$b,"",0)); # load the first second for ($i=0; $i<$tot; $i++) { @@ -152,7 +152,7 @@ sub bn_mul_comba $bi=$bs; $end=$be+1; - &comment("################## Calculate word $i"); + &comment("################## Calculate word $i"); for ($j=$bs; $j<$end; $j++) { diff --git a/crypto/bn/asm/ia64-mont.pl b/crypto/bn/asm/ia64-mont.pl index 5cc5c599f9..233fdd47a9 100644 --- a/crypto/bn/asm/ia64-mont.pl +++ b/crypto/bn/asm/ia64-mont.pl @@ -80,7 +80,7 @@ $code=<<___; // int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap, // const BN_ULONG *bp,const BN_ULONG *np, -// const BN_ULONG *n0p,int num); +// const BN_ULONG *n0p,int num); .align 64 .global bn_mul_mont# .proc bn_mul_mont# @@ -203,7 +203,7 @@ bn_mul_mont_general: { .mmi; .pred.rel "mutex",p39,p41 (p39) add topbit=r0,r0 (p41) add topbit=r0,r0,1 - nop.i 0 } + nop.i 0 } { .mmi; st8 [tp_1]=n[0] add tptr=16,sp add tp_1=8,sp };; diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index 102b656229..5093177552 100644 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -603,13 +603,13 @@ $code.=<<___; sltu $v0,$t2,$ta2 $ST $t2,-2*$BNSZ($a0) $ADDU $v0,$t8 - + $ADDU $ta3,$t3 sltu $t9,$ta3,$t3 $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - + .set noreorder bgtz $at,.L_bn_add_words_loop $ADDU $v0,$t9 @@ -808,7 +808,7 @@ bn_div_3_words: # so that we can save two arguments # and return address in registers # instead of stack:-) - + $LD $a0,($a3) move $ta2,$a1 bne $a0,$a2,bn_div_3_words_internal diff --git a/crypto/bn/asm/parisc-mont.pl b/crypto/bn/asm/parisc-mont.pl index 8aa94e8511..61c3625a3c 100644 --- a/crypto/bn/asm/parisc-mont.pl +++ b/crypto/bn/asm/parisc-mont.pl @@ -546,7 +546,7 @@ L\$copy ldd $idx($np),$hi0 std,ma %r0,8($tp) addib,<> 8,$idx,.-8 ; L\$copy - std,ma $hi0,8($rp) + std,ma $hi0,8($rp) ___ if ($BN_SZ==4) { # PA-RISC 1.1 code-path @@ -868,7 +868,7 @@ L\$copy_pa11 ldwx $idx($np),$hi0 stws,ma %r0,4($tp) addib,<> 4,$idx,L\$copy_pa11 - stws,ma $hi0,4($rp) + stws,ma $hi0,4($rp) nop ; alignment L\$done diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl index 5802260ca6..7a25b1ec9b 100644 --- a/crypto/bn/asm/ppc-mont.pl +++ b/crypto/bn/asm/ppc-mont.pl @@ -26,7 +26,7 @@ # So far RSA *sign* performance improvement over pre-bn_mul_mont asm # for 64-bit application running on PPC970/G5 is: # -# 512-bit +65% +# 512-bit +65% # 1024-bit +35% # 2048-bit +18% # 4096-bit +4% @@ -49,7 +49,7 @@ if ($flavour =~ /32/) { $UMULL= "mullw"; # unsigned multiply low $UMULH= "mulhwu"; # unsigned multiply high $UCMP= "cmplw"; # unsigned compare - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } elsif ($flavour =~ /64/) { @@ -69,7 +69,7 @@ if ($flavour =~ /32/) { $UMULL= "mulld"; # unsigned multiply low $UMULH= "mulhdu"; # unsigned multiply high $UCMP= "cmpld"; # unsigned compare - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } else { die "nonsense $flavour"; } diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl index e9262df0f3..1a03f4561e 100644 --- a/crypto/bn/asm/ppc.pl +++ b/crypto/bn/asm/ppc.pl @@ -38,7 +38,7 @@ #rsa 2048 bits 0.3036s 0.0085s 3.3 117.1 #rsa 4096 bits 2.0040s 0.0299s 0.5 33.4 #dsa 512 bits 0.0087s 0.0106s 114.3 94.5 -#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 +#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 # # Same bechmark with this assembler code: # @@ -74,7 +74,7 @@ #rsa 4096 bits 0.3700s 0.0058s 2.7 171.0 #dsa 512 bits 0.0016s 0.0020s 610.7 507.1 #dsa 1024 bits 0.0047s 0.0058s 212.5 173.2 -# +# # Again, performance increases by at about 75% # # Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code) @@ -125,7 +125,7 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzw"; # count leading zeros $SHL= "slw"; # shift left $SHR= "srw"; # unsigned shift right - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate $SHLI= "slwi"; # shift left by immediate $CLRU= "clrlwi"; # clear upper bits $INSR= "insrwi"; # insert right @@ -149,10 +149,10 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzd"; # count leading zeros $SHL= "sld"; # shift left $SHR= "srd"; # unsigned shift right - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate $SHLI= "sldi"; # shift left by immediate $CLRU= "clrldi"; # clear upper bits - $INSR= "insrdi"; # insert right + $INSR= "insrdi"; # insert right $ROTL= "rotldi"; # rotate left by immediate $TR= "td"; # conditional trap } else { die "nonsense $flavour"; } @@ -189,7 +189,7 @@ $data=<=d? blt Lppcasm_div3 #goto Lppcasm_div3 if not - subf r3,r5,r3 #h-=d ; + subf r3,r5,r3 #h-=d ; Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i cmpi 0,0,r7,0 # is (i == 0)? beq Lppcasm_div4 @@ -1668,7 +1668,7 @@ Lppcasm_div4: # as it saves registers. li r6,2 #r6=2 mtctr r6 #counter will be in count. -Lppcasm_divouterloop: +Lppcasm_divouterloop: $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4) $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 # compute here for innerloop. @@ -1676,7 +1676,7 @@ Lppcasm_divouterloop: bne Lppcasm_div5 # goto Lppcasm_div5 if not li r8,-1 - $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l + $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l b Lppcasm_div6 Lppcasm_div5: $UDIV r8,r3,r9 #q = h/dh @@ -1684,7 +1684,7 @@ Lppcasm_div6: $UMULL r12,r9,r8 #th = q*dh $CLRU r10,r5,`$BITS/2` #r10=dl $UMULL r6,r8,r10 #tl = q*dl - + Lppcasm_divinnerloop: subf r10,r12,r3 #t = h -th $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of... @@ -1761,7 +1761,7 @@ Lppcasm_div9: addi r4,r4,-$BNSZ addi r3,r3,-$BNSZ mtctr r5 -Lppcasm_sqr_mainloop: +Lppcasm_sqr_mainloop: #sqr(r[0],r[1],a[0]); $LDU r6,$BNSZ(r4) $UMULL r7,r6,r6 @@ -1769,7 +1769,7 @@ Lppcasm_sqr_mainloop: $STU r7,$BNSZ(r3) $STU r8,$BNSZ(r3) bdnz Lppcasm_sqr_mainloop -Lppcasm_sqr_adios: +Lppcasm_sqr_adios: blr .long 0 .byte 0,12,0x14,0,0,0,3,0 @@ -1783,7 +1783,7 @@ Lppcasm_sqr_adios: # done in the build # -.align 4 +.align 4 .bn_mul_words: # # BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) @@ -1797,7 +1797,7 @@ Lppcasm_sqr_adios: rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_mw_REM mtctr r7 -Lppcasm_mw_LOOP: +Lppcasm_mw_LOOP: #mul(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1809,7 +1809,7 @@ Lppcasm_mw_LOOP: #using adde. $ST r9,`0*$BNSZ`(r3) #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r11,r6,r8 $UMULH r12,r6,r8 adde r11,r11,r10 @@ -1830,7 +1830,7 @@ Lppcasm_mw_LOOP: addze r12,r12 #this spin we collect carry into #r12 $ST r11,`3*$BNSZ`(r3) - + addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_mw_LOOP @@ -1846,25 +1846,25 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`0*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 addc r9,r9,r12 addze r10,r10 $ST r9,`1*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1873,8 +1873,8 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`2*$BNSZ`(r3) addi r12,r10,0 - -Lppcasm_mw_OVER: + +Lppcasm_mw_OVER: addi r3,r12,0 blr .long 0 @@ -1902,11 +1902,11 @@ Lppcasm_mw_OVER: # empirical evidence suggests that unrolled version performs best!! # xor r0,r0,r0 #r0 = 0 - xor r12,r12,r12 #r12 = 0 . used for carry + xor r12,r12,r12 #r12 = 0 . used for carry rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover mtctr r7 -Lppcasm_maw_mainloop: +Lppcasm_maw_mainloop: #mul_add(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $LD r11,`0*$BNSZ`(r3) @@ -1922,9 +1922,9 @@ Lppcasm_maw_mainloop: #by multiply and will be collected #in the next spin $ST r9,`0*$BNSZ`(r3) - + #mul_add(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $LD r9,`1*$BNSZ`(r3) $UMULL r11,r6,r8 $UMULH r12,r6,r8 @@ -1933,7 +1933,7 @@ Lppcasm_maw_mainloop: addc r11,r11,r9 #addze r12,r12 $ST r11,`1*$BNSZ`(r3) - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1944,7 +1944,7 @@ Lppcasm_maw_mainloop: addc r9,r9,r11 #addze r10,r10 $ST r9,`2*$BNSZ`(r3) - + #mul_add(rp[3],ap[3],w,c1); $LD r8,`3*$BNSZ`(r4) $UMULL r11,r6,r8 @@ -1958,7 +1958,7 @@ Lppcasm_maw_mainloop: addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_maw_mainloop - + Lppcasm_maw_leftover: andi. r5,r5,0x3 beq Lppcasm_maw_adios @@ -1975,10 +1975,10 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[1],ap[1],w,c1); - $LDU r8,$BNSZ(r4) + $LDU r8,$BNSZ(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 $LDU r11,$BNSZ(r3) @@ -1987,7 +1987,7 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[2],ap[2],w,c1); $LDU r8,$BNSZ(r4) @@ -1999,8 +1999,8 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - -Lppcasm_maw_adios: + +Lppcasm_maw_adios: addi r3,r12,0 blr .long 0 diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 0c1b236ef9..f34c84f452 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -382,7 +382,7 @@ $code.=<<___; vpaddq $TEMP1, $ACC1, $ACC1 vpmuludq 32*7-128($aap), $B2, $ACC2 vpbroadcastq 32*5-128($tpa), $B2 - vpaddq 32*11-448($tp1), $ACC2, $ACC2 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 vmovdqu $ACC6, 32*6-192($tp0) vmovdqu $ACC7, 32*7-192($tp0) @@ -441,7 +441,7 @@ $code.=<<___; vmovdqu $ACC7, 32*16-448($tp1) lea 8($tp1), $tp1 - dec $i + dec $i jnz .LOOP_SQR_1024 ___ $ZERO = $ACC9; @@ -786,7 +786,7 @@ $code.=<<___; vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 vpaddq $TEMP3, $ACC7, $ACC7 vpaddq $TEMP4, $ACC8, $ACC8 - + vpsrlq \$29, $ACC4, $TEMP1 vpand $AND_MASK, $ACC4, $ACC4 vpsrlq \$29, $ACC5, $TEMP2 @@ -1451,7 +1451,7 @@ $code.=<<___; vpaddq $TEMP4, $ACC8, $ACC8 vmovdqu $ACC4, 128-128($rp) - vmovdqu $ACC5, 160-128($rp) + vmovdqu $ACC5, 160-128($rp) vmovdqu $ACC6, 192-128($rp) vmovdqu $ACC7, 224-128($rp) vmovdqu $ACC8, 256-128($rp) diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 6f3b664f7a..7bcfafe8dd 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -282,9 +282,9 @@ $code.=<<___; movq %r9, 16(%rsp) movq %r10, 24(%rsp) shrq \$63, %rbx - + #third iteration - movq 16($inp), %r9 + movq 16($inp), %r9 movq 24($inp), %rax mulq %r9 addq %rax, %r12 @@ -532,7 +532,7 @@ $code.=<<___; movl $times,128+8(%rsp) movq $out, %xmm0 # off-load movq %rbp, %xmm1 # off-load -#first iteration +#first iteration mulx %rax, %r8, %r9 mulx 16($inp), %rcx, %r10 @@ -568,7 +568,7 @@ $code.=<<___; mov %rax, (%rsp) mov %r8, 8(%rsp) -#second iteration +#second iteration mulx 16($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -607,8 +607,8 @@ $code.=<<___; mov %r9, 16(%rsp) .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) - -#third iteration + +#third iteration .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 adox $out, %r12 adcx %r9, %r13 @@ -643,8 +643,8 @@ $code.=<<___; mov %r11, 32(%rsp) .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) - -#fourth iteration + +#fourth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx adox %rax, %r14 adcx %rbx, %r15 @@ -676,8 +676,8 @@ $code.=<<___; mov %r13, 48(%rsp) mov %r14, 56(%rsp) - -#fifth iteration + +#fifth iteration .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 adox $out, %r8 adcx %r11, %r9 @@ -704,8 +704,8 @@ $code.=<<___; mov %r15, 64(%rsp) mov %r8, 72(%rsp) - -#sixth iteration + +#sixth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -1048,7 +1048,7 @@ $code.=<<___; movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1150,7 +1150,7 @@ $code.=<<___; movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi @@ -1212,7 +1212,7 @@ $code.=<<___ if ($addx); mulx 48($ap), %rbx, %r14 adcx %rax, %r12 - + mulx 56($ap), %rax, %r15 adcx %rbx, %r13 adcx %rax, %r14 @@ -1411,7 +1411,7 @@ $code.=<<___; ___ $code.=<<___ if ($addx); jmp .Lmul_scatter_tail - + .align 32 .Lmulx_scatter: movq ($out), %rdx # pass b[0] @@ -1824,7 +1824,7 @@ __rsaz_512_mul: movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1901,7 +1901,7 @@ __rsaz_512_mul: movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi diff --git a/crypto/bn/asm/s390x-gf2m.pl b/crypto/bn/asm/s390x-gf2m.pl index cbd16f4214..57b0032d67 100644 --- a/crypto/bn/asm/s390x-gf2m.pl +++ b/crypto/bn/asm/s390x-gf2m.pl @@ -198,7 +198,7 @@ $code.=<<___; xgr $hi,@r[1] xgr $lo,@r[0] xgr $hi,@r[2] - xgr $lo,@r[3] + xgr $lo,@r[3] xgr $hi,@r[3] xgr $lo,$hi stg $hi,16($rp) diff --git a/crypto/bn/asm/via-mont.pl b/crypto/bn/asm/via-mont.pl index 9f81bc822e..558501c315 100644 --- a/crypto/bn/asm/via-mont.pl +++ b/crypto/bn/asm/via-mont.pl @@ -76,7 +76,7 @@ # dsa 1024 bits 0.001346s 0.001595s 742.7 627.0 # dsa 2048 bits 0.004745s 0.005582s 210.7 179.1 # -# Conclusions: +# Conclusions: # - VIA SDK leaves a *lot* of room for improvement (which this # implementation successfully fills:-); # - 'rep montmul' gives up to >3x performance improvement depending on diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl index 6787503666..a8b402d59b 100755 --- a/crypto/bn/asm/x86-mont.pl +++ b/crypto/bn/asm/x86-mont.pl @@ -39,7 +39,7 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; - + &asm_init($ARGV[0],$0); $sse2=0; diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 3278dc6056..8f49391727 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -1049,7 +1049,7 @@ my $bptr="%rdx"; # const void *table, my $nptr="%rcx"; # const BN_ULONG *nptr, my $n0 ="%r8"; # const BN_ULONG *n0); my $num ="%r9"; # int num, has to be divisible by 8 - # int pwr + # int pwr my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); my @A0=("%r10","%r11"); @@ -1126,7 +1126,7 @@ $code.=<<___; ja .Lpwr_page_walk .Lpwr_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -2036,7 +2036,7 @@ __bn_post4x_internal: jnz .Lsqr4x_sub mov $num,%r10 # prepare for back-to-back call - neg $num # restore $num + neg $num # restore $num ret .size __bn_post4x_internal,.-__bn_post4x_internal ___ @@ -2259,7 +2259,7 @@ bn_mulx4x_mont_gather5: mov \$0,%r10 cmovc %r10,%r11 sub %r11,%rbp -.Lmulx4xsp_done: +.Lmulx4xsp_done: and \$-64,%rbp # ensure alignment mov %rsp,%r11 sub %rbp,%r11 @@ -2741,7 +2741,7 @@ bn_powerx5: ja .Lpwrx_page_walk .Lpwrx_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## -- cgit v1.2.3