diff options
Diffstat (limited to 'crypto/cast/asm')
-rw-r--r-- | crypto/cast/asm/c-win32.asm | 940 | ||||
-rw-r--r-- | crypto/cast/asm/cast-586.pl | 167 | ||||
-rw-r--r-- | crypto/cast/asm/cx86unix.cpp | 1010 | ||||
-rw-r--r-- | crypto/cast/asm/readme | 7 |
4 files changed, 2124 insertions, 0 deletions
diff --git a/crypto/cast/asm/c-win32.asm b/crypto/cast/asm/c-win32.asm new file mode 100644 index 0000000000..a1d8a2671a --- /dev/null +++ b/crypto/cast/asm/c-win32.asm @@ -0,0 +1,940 @@ + ; Don't even think of reading this code + ; It was automatically generated by cast-586.pl + ; Which is a perl program used to generate the x86 assember for + ; any of elf, a.out, BSDI,Win32, or Solaris + ; eric <eay@cryptsoft.com> + ; + TITLE cast-586.asm + .486 +.model FLAT +_TEXT SEGMENT +PUBLIC _CAST_encrypt +EXTERN _CAST_S_table0:DWORD +EXTERN _CAST_S_table1:DWORD +EXTERN _CAST_S_table2:DWORD +EXTERN _CAST_S_table3:DWORD + +_CAST_encrypt PROC NEAR + ; + push ebp + push ebx + mov ebx, DWORD PTR 12[esp] + mov ebp, DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi, DWORD PTR [ebx] + mov esi, DWORD PTR 4[ebx] + xor eax, eax + ; round 0 + mov edx, DWORD PTR [ebp] + mov ecx, DWORD PTR 4[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 1 + mov edx, DWORD PTR 8[ebp] + mov ecx, DWORD PTR 12[ebp] + xor edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor esi, ecx + ; round 2 + mov edx, DWORD PTR 16[ebp] + mov ecx, DWORD PTR 20[ebp] + sub edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor edi, ecx + ; round 3 + mov edx, DWORD PTR 24[ebp] + mov ecx, DWORD PTR 28[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor esi, ecx + ; round 4 + mov edx, DWORD PTR 32[ebp] + mov ecx, DWORD PTR 36[ebp] + xor edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor edi, ecx + ; round 5 + mov edx, DWORD PTR 40[ebp] + mov ecx, DWORD PTR 44[ebp] + sub edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor esi, ecx + ; round 6 + mov edx, DWORD PTR 48[ebp] + mov ecx, DWORD PTR 52[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 7 + mov edx, DWORD PTR 56[ebp] + mov ecx, DWORD PTR 60[ebp] + xor edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor esi, ecx + ; round 8 + mov edx, DWORD PTR 64[ebp] + mov ecx, DWORD PTR 68[ebp] + sub edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor edi, ecx + ; round 9 + mov edx, DWORD PTR 72[ebp] + mov ecx, DWORD PTR 76[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor esi, ecx + ; round 10 + mov edx, DWORD PTR 80[ebp] + mov ecx, DWORD PTR 84[ebp] + xor edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor edi, ecx + ; round 11 + mov edx, DWORD PTR 88[ebp] + mov ecx, DWORD PTR 92[ebp] + sub edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor esi, ecx + ; round 12 + mov edx, DWORD PTR 96[ebp] + mov ecx, DWORD PTR 100[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 13 + mov edx, DWORD PTR 104[ebp] + mov ecx, DWORD PTR 108[ebp] + xor edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor esi, ecx + ; round 14 + mov edx, DWORD PTR 112[ebp] + mov ecx, DWORD PTR 116[ebp] + sub edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor edi, ecx + ; round 15 + mov edx, DWORD PTR 120[ebp] + mov ecx, DWORD PTR 124[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + mov eax, DWORD PTR 20[esp] + xor esi, ecx + nop + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_CAST_encrypt ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _CAST_decrypt +EXTERN _CAST_S_table0:DWORD +EXTERN _CAST_S_table1:DWORD +EXTERN _CAST_S_table2:DWORD +EXTERN _CAST_S_table3:DWORD + +_CAST_decrypt PROC NEAR + ; + push ebp + push ebx + mov ebx, DWORD PTR 12[esp] + mov ebp, DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi, DWORD PTR [ebx] + mov esi, DWORD PTR 4[ebx] + xor eax, eax + ; round 15 + mov edx, DWORD PTR 120[ebp] + mov ecx, DWORD PTR 124[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 14 + mov edx, DWORD PTR 112[ebp] + mov ecx, DWORD PTR 116[ebp] + sub edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor esi, ecx + ; round 13 + mov edx, DWORD PTR 104[ebp] + mov ecx, DWORD PTR 108[ebp] + xor edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor edi, ecx + ; round 12 + mov edx, DWORD PTR 96[ebp] + mov ecx, DWORD PTR 100[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor esi, ecx + ; round 11 + mov edx, DWORD PTR 88[ebp] + mov ecx, DWORD PTR 92[ebp] + sub edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor edi, ecx + ; round 10 + mov edx, DWORD PTR 80[ebp] + mov ecx, DWORD PTR 84[ebp] + xor edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor esi, ecx + ; round 9 + mov edx, DWORD PTR 72[ebp] + mov ecx, DWORD PTR 76[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 8 + mov edx, DWORD PTR 64[ebp] + mov ecx, DWORD PTR 68[ebp] + sub edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor esi, ecx + ; round 7 + mov edx, DWORD PTR 56[ebp] + mov ecx, DWORD PTR 60[ebp] + xor edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor edi, ecx + ; round 6 + mov edx, DWORD PTR 48[ebp] + mov ecx, DWORD PTR 52[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor esi, ecx + ; round 5 + mov edx, DWORD PTR 40[ebp] + mov ecx, DWORD PTR 44[ebp] + sub edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor edi, ecx + ; round 4 + mov edx, DWORD PTR 32[ebp] + mov ecx, DWORD PTR 36[ebp] + xor edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor esi, ecx + ; round 3 + mov edx, DWORD PTR 24[ebp] + mov ecx, DWORD PTR 28[ebp] + add edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + xor edi, ecx + ; round 2 + mov edx, DWORD PTR 16[ebp] + mov ecx, DWORD PTR 20[ebp] + sub edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + sub ecx, ebx + xor esi, ecx + ; round 1 + mov edx, DWORD PTR 8[ebp] + mov ecx, DWORD PTR 12[ebp] + xor edx, esi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + add ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + xor ecx, ebx + xor edi, ecx + ; round 0 + mov edx, DWORD PTR [ebp] + mov ecx, DWORD PTR 4[ebp] + add edx, edi + rol edx, cl + mov ebx, edx + xor ecx, ecx + mov cl, dh + and ebx, 255 + shr edx, 16 + xor eax, eax + mov al, dh + and edx, 255 + mov ecx, DWORD PTR _CAST_S_table0[ecx*4] + mov ebx, DWORD PTR _CAST_S_table1[ebx*4] + xor ecx, ebx + mov ebx, DWORD PTR _CAST_S_table2[eax*4] + sub ecx, ebx + mov ebx, DWORD PTR _CAST_S_table3[edx*4] + add ecx, ebx + mov eax, DWORD PTR 20[esp] + xor esi, ecx + nop + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_CAST_decrypt ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _CAST_cbc_encrypt + +_CAST_cbc_encrypt PROC NEAR + ; + push ebp + push ebx + push esi + push edi + mov ebp, DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx, DWORD PTR 36[esp] + mov esi, DWORD PTR [ebx] + mov edi, DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx, esp + mov esi, DWORD PTR 36[esp] + mov edi, DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx, DWORD PTR 56[esp] + ; get and push parameter 3 + mov eax, DWORD PTR 48[esp] + push eax + push ebx + cmp ecx, 0 + jz $L000decrypt + and ebp, 4294967288 + mov eax, DWORD PTR 8[esp] + mov ebx, DWORD PTR 12[esp] + jz $L001encrypt_finish +L002encrypt_loop: + mov ecx, DWORD PTR [esi] + mov edx, DWORD PTR 4[esi] + xor eax, ecx + xor ebx, edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call _CAST_encrypt + mov eax, DWORD PTR 8[esp] + mov ebx, DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L002encrypt_loop +$L001encrypt_finish: + mov ebp, DWORD PTR 52[esp] + and ebp, 7 + jz $L003finish + xor ecx, ecx + xor edx, edx + mov ebp, DWORD PTR $L004cbc_enc_jmp_table[ebp*4] + jmp ebp +L005ej7: + xor edx, edx + mov dh, BYTE PTR 6[esi] + shl edx, 8 +L006ej6: + mov dh, BYTE PTR 5[esi] +L007ej5: + mov dl, BYTE PTR 4[esi] +L008ej4: + mov ecx, DWORD PTR [esi] + jmp $L009ejend +L010ej3: + mov ch, BYTE PTR 2[esi] + xor ecx, ecx + shl ecx, 8 +L011ej2: + mov ch, BYTE PTR 1[esi] +L012ej1: + mov cl, BYTE PTR [esi] +$L009ejend: + xor eax, ecx + xor ebx, edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call _CAST_encrypt + mov eax, DWORD PTR 8[esp] + mov ebx, DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L003finish +$L000decrypt: + and ebp, 4294967288 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + jz $L013decrypt_finish +L014decrypt_loop: + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call _CAST_decrypt + mov eax, DWORD PTR 8[esp] + mov ebx, DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx, DWORD PTR 16[esp] + mov edx, DWORD PTR 20[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L014decrypt_loop +$L013decrypt_finish: + mov ebp, DWORD PTR 52[esp] + and ebp, 7 + jz $L003finish + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call _CAST_decrypt + mov eax, DWORD PTR 8[esp] + mov ebx, DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx, DWORD PTR 16[esp] + mov edx, DWORD PTR 20[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] +L015dj7: + ror edx, 16 + mov BYTE PTR 6[edi],dl + shr edx, 16 +L016dj6: + mov BYTE PTR 5[edi],dh +L017dj5: + mov BYTE PTR 4[edi],dl +L018dj4: + mov DWORD PTR [edi],ecx + jmp $L019djend +L020dj3: + ror ecx, 16 + mov BYTE PTR 2[edi],cl + shl ecx, 16 +L021dj2: + mov BYTE PTR 1[esi],ch +L022dj1: + mov BYTE PTR [esi], cl +$L019djend: + jmp $L003finish +$L003finish: + mov ecx, DWORD PTR 60[esp] + add esp, 24 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +$L004cbc_enc_jmp_table: + DD 0 + DD L012ej1 + DD L011ej2 + DD L010ej3 + DD L008ej4 + DD L007ej5 + DD L006ej6 + DD L005ej7 +L023cbc_dec_jmp_table: + DD 0 + DD L022dj1 + DD L021dj2 + DD L020dj3 + DD L018dj4 + DD L017dj5 + DD L016dj6 + DD L015dj7 +_CAST_cbc_encrypt ENDP +_TEXT ENDS +END diff --git a/crypto/cast/asm/cast-586.pl b/crypto/cast/asm/cast-586.pl new file mode 100644 index 0000000000..d6b6f19bea --- /dev/null +++ b/crypto/cast/asm/cast-586.pl @@ -0,0 +1,167 @@ +#!/usr/local/bin/perl + +# define for pentium pro friendly version +$ppro=1; + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; +require "cbc.pl"; + +&asm_init($ARGV[0],"cast-586.pl"); + +$CAST_ROUNDS=16; +$L="edi"; +$R="esi"; +$K="ebp"; +$tmp1="ecx"; +$tmp2="ebx"; +$tmp3="eax"; +$tmp4="edx"; +$S1="CAST_S_table0"; +$S2="CAST_S_table1"; +$S3="CAST_S_table2"; +$S4="CAST_S_table3"; + +@F1=("add","xor","sub"); +@F2=("xor","sub","add"); +@F3=("sub","add","xor"); + +&CAST_encrypt("CAST_encrypt",1); +&CAST_encrypt("CAST_decrypt",0); +&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1); + +&asm_finish(); + +sub CAST_encrypt + { + local($name,$enc)=@_; + + local($win_ex)=<<"EOF"; +EXTERN _CAST_S_table0:DWORD +EXTERN _CAST_S_table1:DWORD +EXTERN _CAST_S_table2:DWORD +EXTERN _CAST_S_table3:DWORD +EOF + &main'external_label( + "CAST_S_table0", + "CAST_S_table1", + "CAST_S_table2", + "CAST_S_table3", + ); + + &function_begin_B($name,$win_ex); + + &comment(""); + + &push("ebp"); + &push("ebx"); + &mov($tmp2,&wparam(0)); + &mov($K,&wparam(1)); + &push("esi"); + &push("edi"); + + &comment("Load the 2 words"); + &mov($L,&DWP(0,$tmp2,"",0)); + &mov($R,&DWP(4,$tmp2,"",0)); + + &xor( $tmp3, $tmp3); + + # encrypting part + + if ($enc) + { + &E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4,1); + } + else + { + &E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4,1); + } + + &nop(); + &mov(&DWP(4,$tmp3,"",0),$L); + &mov(&DWP(0,$tmp3,"",0),$R); + &function_end($name); + } + +sub E_CAST + { + local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4,$lst)=@_; + # Ri needs to have 16 pre added. + + &comment("round $i"); + &mov( $tmp4, &DWP($i*8,$K,"",1)); + + &mov( $tmp1, &DWP($i*8+4,$K,"",1));# must be word + &$OP1( $tmp4, $R); + + &rotl( $tmp4, &LB($tmp1)); + + if ($ppro) + { + &mov( $tmp2, $tmp4); # B + &xor( $tmp1, $tmp1); + + &movb( &LB($tmp1), &HB($tmp4)); # A + &and( $tmp2, 0xff); + + &shr( $tmp4, 16); # + &xor( $tmp3, $tmp3); + } + else + { + &mov( $tmp2, $tmp4); # B + &movb( &LB($tmp1), &HB($tmp4)); # A # BAD BAD BAD + + &shr( $tmp4, 16); # + &and( $tmp2, 0xff); + } + + &movb( &LB($tmp3), &HB($tmp4)); # C # BAD BAD BAD + &and( $tmp4, 0xff); # D + + &mov( $tmp1, &DWP($S1,"",$tmp1,4)); + &mov( $tmp2, &DWP($S2,"",$tmp2,4)); + + &$OP2( $tmp1, $tmp2); + &mov( $tmp2, &DWP($S3,"",$tmp3,4)); + + &$OP3( $tmp1, $tmp2); + &mov( $tmp2, &DWP($S4,"",$tmp4,4)); + + &$OP1( $tmp1, $tmp2); + &mov($tmp3,&wparam(0)) if $lst; + # XXX + + &xor( $L, $tmp1); + # XXX + } diff --git a/crypto/cast/asm/cx86unix.cpp b/crypto/cast/asm/cx86unix.cpp new file mode 100644 index 0000000000..035692a5af --- /dev/null +++ b/crypto/cast/asm/cx86unix.cpp @@ -0,0 +1,1010 @@ +/* Run the C pre-processor over this file with one of the following defined + * ELF - elf object files, + * OUT - a.out object files, + * BSDI - BSDI style a.out object files + * SOL - Solaris style elf + */ + +#define TYPE(a,b) .type a,b +#define SIZE(a,b) .size a,b + +#if defined(OUT) || defined(BSDI) +#define CAST_S_table0 _CAST_S_table0 +#define CAST_S_table1 _CAST_S_table1 +#define CAST_S_table2 _CAST_S_table2 +#define CAST_S_table3 _CAST_S_table3 +#define CAST_encrypt _CAST_encrypt +#define CAST_S_table0 _CAST_S_table0 +#define CAST_S_table1 _CAST_S_table1 +#define CAST_S_table2 _CAST_S_table2 +#define CAST_S_table3 _CAST_S_table3 +#define CAST_decrypt _CAST_decrypt +#define CAST_cbc_encrypt _CAST_cbc_encrypt + +#endif + +#ifdef OUT +#define OK 1 +#define ALIGN 4 +#endif + +#ifdef BSDI +#define OK 1 +#define ALIGN 4 +#undef SIZE +#undef TYPE +#define SIZE(a,b) +#define TYPE(a,b) +#endif + +#if defined(ELF) || defined(SOL) +#define OK 1 +#define ALIGN 16 +#endif + +#ifndef OK +You need to define one of +ELF - elf systems - linux-elf, NetBSD and DG-UX +OUT - a.out systems - linux-a.out and FreeBSD +SOL - solaris systems, which are elf with strange comment lines +BSDI - a.out with a very primative version of as. +#endif + +/* Let the Assembler begin :-) */ + /* Don't even think of reading this code */ + /* It was automatically generated by cast-586.pl */ + /* Which is a perl program used to generate the x86 assember for */ + /* any of elf, a.out, BSDI,Win32, or Solaris */ + /* eric <eay@cryptsoft.com> */ + + .file "cast-586.s" + .version "01.01" +gcc2_compiled.: +.text + .align ALIGN +.globl CAST_encrypt + TYPE(CAST_encrypt,@function) +CAST_encrypt: + + pushl %ebp + pushl %ebx + movl 12(%esp), %ebx + movl 16(%esp), %ebp + pushl %esi + pushl %edi + /* Load the 2 words */ + movl (%ebx), %edi + movl 4(%ebx), %esi + xorl %eax, %eax + /* round 0 */ + movl (%ebp), %edx + movl 4(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 1 */ + movl 8(%ebp), %edx + movl 12(%ebp), %ecx + xorl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %esi + /* round 2 */ + movl 16(%ebp), %edx + movl 20(%ebp), %ecx + subl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %edi + /* round 3 */ + movl 24(%ebp), %edx + movl 28(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %esi + /* round 4 */ + movl 32(%ebp), %edx + movl 36(%ebp), %ecx + xorl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %edi + /* round 5 */ + movl 40(%ebp), %edx + movl 44(%ebp), %ecx + subl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %esi + /* round 6 */ + movl 48(%ebp), %edx + movl 52(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 7 */ + movl 56(%ebp), %edx + movl 60(%ebp), %ecx + xorl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %esi + /* round 8 */ + movl 64(%ebp), %edx + movl 68(%ebp), %ecx + subl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %edi + /* round 9 */ + movl 72(%ebp), %edx + movl 76(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %esi + /* round 10 */ + movl 80(%ebp), %edx + movl 84(%ebp), %ecx + xorl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %edi + /* round 11 */ + movl 88(%ebp), %edx + movl 92(%ebp), %ecx + subl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %esi + /* round 12 */ + movl 96(%ebp), %edx + movl 100(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 13 */ + movl 104(%ebp), %edx + movl 108(%ebp), %ecx + xorl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %esi + /* round 14 */ + movl 112(%ebp), %edx + movl 116(%ebp), %ecx + subl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %edi + /* round 15 */ + movl 120(%ebp), %edx + movl 124(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + movl 20(%esp), %eax + xorl %ecx, %esi + nop + movl %edi, 4(%eax) + movl %esi, (%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.CAST_encrypt_end: + SIZE(CAST_encrypt,.CAST_encrypt_end-CAST_encrypt) +.ident "CAST_encrypt" +.text + .align ALIGN +.globl CAST_decrypt + TYPE(CAST_decrypt,@function) +CAST_decrypt: + + pushl %ebp + pushl %ebx + movl 12(%esp), %ebx + movl 16(%esp), %ebp + pushl %esi + pushl %edi + /* Load the 2 words */ + movl (%ebx), %edi + movl 4(%ebx), %esi + xorl %eax, %eax + /* round 15 */ + movl 120(%ebp), %edx + movl 124(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 14 */ + movl 112(%ebp), %edx + movl 116(%ebp), %ecx + subl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %esi + /* round 13 */ + movl 104(%ebp), %edx + movl 108(%ebp), %ecx + xorl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %edi + /* round 12 */ + movl 96(%ebp), %edx + movl 100(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %esi + /* round 11 */ + movl 88(%ebp), %edx + movl 92(%ebp), %ecx + subl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %edi + /* round 10 */ + movl 80(%ebp), %edx + movl 84(%ebp), %ecx + xorl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %esi + /* round 9 */ + movl 72(%ebp), %edx + movl 76(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 8 */ + movl 64(%ebp), %edx + movl 68(%ebp), %ecx + subl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %esi + /* round 7 */ + movl 56(%ebp), %edx + movl 60(%ebp), %ecx + xorl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %edi + /* round 6 */ + movl 48(%ebp), %edx + movl 52(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %esi + /* round 5 */ + movl 40(%ebp), %edx + movl 44(%ebp), %ecx + subl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %edi + /* round 4 */ + movl 32(%ebp), %edx + movl 36(%ebp), %ecx + xorl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %esi + /* round 3 */ + movl 24(%ebp), %edx + movl 28(%ebp), %ecx + addl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + xorl %ecx, %edi + /* round 2 */ + movl 16(%ebp), %edx + movl 20(%ebp), %ecx + subl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx, %ecx + xorl %ecx, %esi + /* round 1 */ + movl 8(%ebp), %edx + movl 12(%ebp), %ecx + xorl %esi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx, %ecx + xorl %ecx, %edi + /* round 0 */ + movl (%ebp), %edx + movl 4(%ebp), %ecx + addl %edi, %edx + roll %cl, %edx + movl %edx, %ebx + xorl %ecx, %ecx + movb %dh, %cl + andl $255, %ebx + shrl $16, %edx + xorl %eax, %eax + movb %dh, %al + andl $255, %edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx, %ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx, %ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx, %ecx + movl 20(%esp), %eax + xorl %ecx, %esi + nop + movl %edi, 4(%eax) + movl %esi, (%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.CAST_decrypt_end: + SIZE(CAST_decrypt,.CAST_decrypt_end-CAST_decrypt) +.ident "CAST_decrypt" +.text + .align ALIGN +.globl CAST_cbc_encrypt + TYPE(CAST_cbc_encrypt,@function) +CAST_cbc_encrypt: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp), %ebp + /* getting iv ptr from parameter 4 */ + movl 36(%esp), %ebx + movl (%ebx), %esi + movl 4(%ebx), %edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp, %ebx + movl 36(%esp), %esi + movl 40(%esp), %edi + /* getting encrypt flag from parameter 5 */ + movl 56(%esp), %ecx + /* get and push parameter 3 */ + movl 48(%esp), %eax + pushl %eax + pushl %ebx + cmpl $0, %ecx + jz .L000decrypt + andl $4294967288, %ebp + movl 8(%esp), %eax + movl 12(%esp), %ebx + jz .L001encrypt_finish +.L002encrypt_loop: + movl (%esi), %ecx + movl 4(%esi), %edx + xorl %ecx, %eax + xorl %edx, %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, 8(%esp) + movl %ebx, 12(%esp) + call CAST_encrypt + movl 8(%esp), %eax + movl 12(%esp), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, (%edi) + movl %ebx, 4(%edi) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L002encrypt_loop +.L001encrypt_finish: + movl 52(%esp), %ebp + andl $7, %ebp + jz .L003finish + xorl %ecx, %ecx + xorl %edx, %edx + movl .L004cbc_enc_jmp_table(,%ebp,4),%ebp + jmp *%ebp +.L005ej7: + xorl %edx, %edx + movb 6(%esi), %dh + sall $8, %edx +.L006ej6: + movb 5(%esi), %dh +.L007ej5: + movb 4(%esi), %dl +.L008ej4: + movl (%esi), %ecx + jmp .L009ejend +.L010ej3: + movb 2(%esi), %ch + xorl %ecx, %ecx + sall $8, %ecx +.L011ej2: + movb 1(%esi), %ch +.L012ej1: + movb (%esi), %cl +.L009ejend: + xorl %ecx, %eax + xorl %edx, %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, 8(%esp) + movl %ebx, 12(%esp) + call CAST_encrypt + movl 8(%esp), %eax + movl 12(%esp), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, (%edi) + movl %ebx, 4(%edi) + jmp .L003finish +.align ALIGN +.L000decrypt: + andl $4294967288, %ebp + movl 16(%esp), %eax + movl 20(%esp), %ebx + jz .L013decrypt_finish +.L014decrypt_loop: + movl (%esi), %eax + movl 4(%esi), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, 8(%esp) + movl %ebx, 12(%esp) + call CAST_decrypt + movl 8(%esp), %eax + movl 12(%esp), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl 16(%esp), %ecx + movl 20(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx + movl %ecx, (%edi) + movl %edx, 4(%edi) + movl %eax, 16(%esp) + movl %ebx, 20(%esp) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L014decrypt_loop +.L013decrypt_finish: + movl 52(%esp), %ebp + andl $7, %ebp + jz .L003finish + movl (%esi), %eax + movl 4(%esi), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl %eax, 8(%esp) + movl %ebx, 12(%esp) + call CAST_decrypt + movl 8(%esp), %eax + movl 12(%esp), %ebx +.byte 15 +.byte 200 /* bswapl %eax */ +.byte 15 +.byte 203 /* bswapl %ebx */ + movl 16(%esp), %ecx + movl 20(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx +.L015dj7: + rorl $16, %edx + movb %dl, 6(%edi) + shrl $16, %edx +.L016dj6: + movb %dh, 5(%edi) +.L017dj5: + movb %dl, 4(%edi) +.L018dj4: + movl %ecx, (%edi) + jmp .L019djend +.L020dj3: + rorl $16, %ecx + movb %cl, 2(%edi) + sall $16, %ecx +.L021dj2: + movb %ch, 1(%esi) +.L022dj1: + movb %cl, (%esi) +.L019djend: + jmp .L003finish +.align ALIGN +.L003finish: + movl 60(%esp), %ecx + addl $24, %esp + movl %eax, (%ecx) + movl %ebx, 4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align ALIGN +.L004cbc_enc_jmp_table: + .long 0 + .long .L012ej1 + .long .L011ej2 + .long .L010ej3 + .long .L008ej4 + .long .L007ej5 + .long .L006ej6 + .long .L005ej7 +.align ALIGN +.L023cbc_dec_jmp_table: + .long 0 + .long .L022dj1 + .long .L021dj2 + .long .L020dj3 + .long .L018dj4 + .long .L017dj5 + .long .L016dj6 + .long .L015dj7 +.CAST_cbc_encrypt_end: + SIZE(CAST_cbc_encrypt,.CAST_cbc_encrypt_end-CAST_cbc_encrypt) +.ident "desasm.pl" diff --git a/crypto/cast/asm/readme b/crypto/cast/asm/readme new file mode 100644 index 0000000000..fbcd76289e --- /dev/null +++ b/crypto/cast/asm/readme @@ -0,0 +1,7 @@ +There is a ppro flag in cast-586 which turns on/off +generation of pentium pro/II friendly code + +This flag makes the inner loop one cycle longer, but generates +code that runs %30 faster on the pentium pro/II, while only %7 slower +on the pentium. By default, this flag is on. + |