diff options
author | Ralf S. Engelschall <rse@openssl.org> | 1998-12-21 10:52:47 +0000 |
---|---|---|
committer | Ralf S. Engelschall <rse@openssl.org> | 1998-12-21 10:52:47 +0000 |
commit | d02b48c63a58ea4367a0e905979f140b7d090f86 (patch) | |
tree | 504f62ed3d84799f785b9cd9fab255a21b0e1b0e /crypto/bn/stuff | |
download | openssl-d02b48c63a58ea4367a0e905979f140b7d090f86.tar.gz |
Import of old SSLeay release: SSLeay 0.8.1b
Diffstat (limited to 'crypto/bn/stuff')
-rw-r--r-- | crypto/bn/stuff/bn_knuth.c | 378 | ||||
-rw-r--r-- | crypto/bn/stuff/div.c | 340 | ||||
-rw-r--r-- | crypto/bn/stuff/mont.doc | 17 | ||||
-rw-r--r-- | crypto/bn/stuff/wei_mulw.c | 410 |
4 files changed, 1145 insertions, 0 deletions
diff --git a/crypto/bn/stuff/bn_knuth.c b/crypto/bn/stuff/bn_knuth.c new file mode 100644 index 0000000000..9a3f4130ed --- /dev/null +++ b/crypto/bn/stuff/bn_knuth.c @@ -0,0 +1,378 @@ +/* crypto/bn/bn_knuth.c */ + +#include <stdio.h> +#include "cryptlib.h" +#include "bn.h" + +/* This is just a test implementation, it has not been modified for + * speed and it still has memory leaks. */ + +int BN_mask_bits(BIGNUM *a,int n); + +#undef DEBUG +#define MAIN + +/* r must be different to a and b + * Toom-Cook multiplication algorithm, taken from + * The Art Of Computer Programming, Volume 2, Donald Knuth + */ + +#define CODE1 ((BIGNUM *)0x01) +#define CODE2 ((BIGNUM *)0x02) +#define CODE3 ((BIGNUM *)0x03) +#define MAXK (30+1) + +#define C3 3 +#define C4 4 +#define C5 5 +#define C6 6 +#define C7 7 +#define C8 8 +#define C9 9 +#define C10 10 +#define DONE 11 + +int new_total=0; +int Free_total=0; +int max=0,max_total=0; + +BIGNUM *LBN_new(void ); +BIGNUM *LBN_dup(BIGNUM *a); +void LBN_free(BIGNUM *a); + +int BN_mul_knuth(w, a, b) +BIGNUM *w; +BIGNUM *a; +BIGNUM *b; + { + int ret=1; + int i,j,n,an,bn,y,z; + BIGNUM *U[MAXK],*V[MAXK],*T[MAXK]; + BIGNUM *C[(MAXK*2*3)]; + BIGNUM *W[(MAXK*2)],*t1,*t2,*t3,*t4; + int Utos,Vtos,Ctos,Wtos,Ttos; + unsigned int k,Q,R; + unsigned int q[MAXK]; + unsigned int r[MAXK]; + int state; + + /* C1 */ + Utos=Vtos=Ctos=Wtos=Ttos=0; + k=1; + q[0]=q[1]=64; + r[0]=r[1]=4; + Q=6; + R=2; + + if (!bn_expand(w,BN_BITS2*2)) goto err; + an=BN_num_bits(a); + bn=BN_num_bits(b); + n=(an > bn)?an:bn; + while ((q[k-1]+q[k]) < n) + { + k++; + Q+=R; + i=R+1; + if ((i*i) <= Q) R=i; + q[k]=(1<<Q); + r[k]=(1<<R); + } +#ifdef DEBUG + printf("k ="); + for (i=0; i<=k; i++) printf("%7d",i); + printf("\nq[k]="); + for (i=0; i<=k; i++) printf("%7d",q[i]); + printf("\nr[k]="); + for (i=0; i<=k; i++) printf("%7d",r[i]); + printf("\n"); +#endif + + /* C2 */ + C[Ctos++]=CODE1; + if ((t1=LBN_dup(a)) == NULL) goto err; + C[Ctos++]=t1; + if ((t1=LBN_dup(b)) == NULL) goto err; + C[Ctos++]=t1; + + state=C3; + for (;;) + { +#ifdef DEBUG + printf("state=C%d, Ctos=%d Wtos=%d\n",state,Ctos,Wtos); +#endif + switch (state) + { + int lr,lq,lp; + case C3: + k--; + if (k == 0) + { + t1=C[--Ctos]; + t2=C[--Ctos]; +#ifdef DEBUG + printf("Ctos=%d poped %d\n",Ctos,2); +#endif + if ((t2->top == 0) || (t1->top == 0)) + w->top=0; + else + BN_mul(w,t1,t2); + + LBN_free(t1); /* FREE */ + LBN_free(t2); /* FREE */ + state=C10; + } + else + { + lr=r[k]; + lq=q[k]; + lp=q[k-1]+q[k]; + state=C4; + } + break; + case C4: + for (z=0; z<2; z++) /* do for u and v */ + { + /* break the item at C[Ctos-1] + * into lr+1 parts of lq bits each + * for j=0; j<=2r; j++ + */ + t1=C[--Ctos]; /* pop off u */ +#ifdef DEBUG + printf("Ctos=%d poped %d\n",Ctos,1); +#endif + if ((t2=LBN_dup(t1)) == NULL) goto err; + BN_mask_bits(t2,lq); + T[Ttos++]=t2; +#ifdef DEBUG + printf("C4 r=0 bits=%d\n",BN_num_bits(t2)); +#endif + for (i=1; i<=lr; i++) + { + if (!BN_rshift(t1,t1,lq)) goto err; + if ((t2=LBN_dup(t1)) == NULL) goto err; + BN_mask_bits(t2,lq); + T[Ttos++]=t2; +#ifdef DEBUG + printf("C4 r=%d bits=%d\n",i, + BN_num_bits(t2)); +#endif + } + LBN_free(t1); + + if ((t2=LBN_new()) == NULL) goto err; + if ((t3=LBN_new()) == NULL) goto err; + for (j=0; j<=2*lr; j++) + { + if ((t1=LBN_new()) == NULL) goto err; + + if (!BN_set_word(t3,j)) goto err; + for (i=lr; i>=0; i--) + { + if (!BN_mul(t2,t1,t3)) goto err; + if (!BN_add(t1,t2,T[i])) goto err; + } + /* t1 is U(j) */ + if (z == 0) + U[Utos++]=t1; + else + V[Vtos++]=t1; + } + LBN_free(t2); + LBN_free(t3); + while (Ttos) LBN_free(T[--Ttos]); + } +#ifdef DEBUG + for (i=0; i<Utos; i++) + printf("U[%2d]=%4d bits\n",i,BN_num_bits(U[i])); + for (i=0; i<Vtos; i++) + printf("V[%2d]=%4d bits\n",i,BN_num_bits(V[i])); +#endif + /* C5 */ +#ifdef DEBUG + printf("PUSH CODE2 and %d CODE3 onto stack\n",2*lr); +#endif + C[Ctos++]=CODE2; + for (i=2*lr; i>0; i--) + { + C[Ctos++]=V[i]; + C[Ctos++]=U[i]; + C[Ctos++]=CODE3; + } + C[Ctos++]=V[0]; + C[Ctos++]=U[0]; +#ifdef DEBUG + printf("Ctos=%d pushed %d\n",Ctos,2*lr*3+3); +#endif + Vtos=Utos=0; + state=C3; + break; + case C6: + if ((t1=LBN_dup(w)) == NULL) goto err; + W[Wtos++]=t1; +#ifdef DEBUG + printf("put %d bit number onto w\n",BN_num_bits(t1)); +#endif + state=C3; + break; + case C7: + lr=r[k]; + lq=q[k]; + lp=q[k]+q[k-1]; + z=Wtos-2*lr-1; + for (j=1; j<=2*lr; j++) + { + for (i=2*lr; i>=j; i--) + { + if (!BN_sub(W[z+i],W[z+i],W[z+i-1])) goto err; + BN_div_word(W[z+i],j); + } + } + state=C8; + break; + case C8: + y=2*lr-1; + if ((t1=LBN_new()) == NULL) goto err; + if ((t3=LBN_new()) == NULL) goto err; + + for (j=y; j>0; j--) + { + if (!BN_set_word(t3,j)) goto err; + for (i=j; i<=y; i++) + { + if (!BN_mul(t1,W[z+i+1],t3)) goto err; + if (!BN_sub(W[z+i],W[z+i],t1)) goto err; + } + } + LBN_free(t1); + LBN_free(t3); + state=C9; + break; + case C9: + BN_zero(w); +#ifdef DEBUG + printf("lq=%d\n",lq); +#endif + for (i=lr*2; i>=0; i--) + { + BN_lshift(w,w,lq); + BN_add(w,w,W[z+i]); + } + for (i=0; i<=lr*2; i++) + LBN_free(W[--Wtos]); + state=C10; + break; + case C10: + k++; + t1=C[--Ctos]; +#ifdef DEBUG + printf("Ctos=%d poped %d\n",Ctos,1); + printf("code= CODE%d\n",t1); +#endif + if (t1 == CODE3) + state=C6; + else if (t1 == CODE2) + { + if ((t2=LBN_dup(w)) == NULL) goto err; + W[Wtos++]=t2; + state=C7; + } + else if (t1 == CODE1) + { + state=DONE; + } + else + { + printf("BAD ERROR\n"); + goto err; + } + break; + default: + printf("bad state\n"); + goto err; + break; + } + if (state == DONE) break; + } + ret=1; +err: + if (ret == 0) printf("ERROR\n"); + return(ret); + } + +#ifdef MAIN +main() + { + BIGNUM *a,*b,*r; + int i; + + if ((a=LBN_new()) == NULL) goto err; + if ((b=LBN_new()) == NULL) goto err; + if ((r=LBN_new()) == NULL) goto err; + + if (!BN_rand(a,1024*2,0,0)) goto err; + if (!BN_rand(b,1024*2,0,0)) goto err; + + for (i=0; i<10; i++) + { + if (!BN_mul_knuth(r,a,b)) goto err; /**/ + /*if (!BN_mul(r,a,b)) goto err; /**/ + } +BN_print(stdout,a); printf(" * "); +BN_print(stdout,b); printf(" =\n"); +BN_print(stdout,r); printf("\n"); + +printf("BN_new() =%d\nBN_free()=%d max=%d\n",new_total,Free_total,max); + + + exit(0); +err: + ERR_load_crypto_strings(); + ERR_print_errors(stderr); + exit(1); + } +#endif + +int BN_mask_bits(a,n) +BIGNUM *a; +int n; + { + int b,w; + + w=n/BN_BITS2; + b=n%BN_BITS2; + if (w >= a->top) return(0); + if (b == 0) + a->top=w; + else + { + a->top=w+1; + a->d[w]&= ~(BN_MASK2<<b); + } + return(1); + } + +BIGNUM *LBN_dup(a) +BIGNUM *a; + { + new_total++; + max_total++; + if (max_total > max) max=max_total; + return(BN_dup(a)); + } + +BIGNUM *LBN_new() + { + new_total++; + max_total++; + if (max_total > max) max=max_total; + return(BN_new()); + } + +void LBN_free(a) +BIGNUM *a; + { + max_total--; + if (max_total > max) max=max_total; + Free_total++; + BN_free(a); + } diff --git a/crypto/bn/stuff/div.c b/crypto/bn/stuff/div.c new file mode 100644 index 0000000000..3d6e08622d --- /dev/null +++ b/crypto/bn/stuff/div.c @@ -0,0 +1,340 @@ +/* crypto/bn/div.c */ + +#include <stdio.h> +#include "cryptlib.h" +#include "bn.h" + +BN_ULONG bn_div_2word(); + +int BN_div2(dv, rm, num, div,ctx) +BIGNUM *dv; +BIGNUM *rm; +BIGNUM *num; +BIGNUM *div; +BN_CTX *ctx; + { + int norm_shift,i,j,nm,nd,loop; + BIGNUM *tmp,wnum,*snum,*sdiv,*res; + BN_ULONG *resp,*wnump; + BN_ULONG d0,d1; + int num_n,div_n; + +#ifdef DEBUG +BN_print(stdout,num); printf(" number\n"); +BN_print(stdout,div); printf(" divisor\n"); +#endif + if (BN_is_zero(num)) + { + BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); + return(0); + } + + if (BN_cmp(num,div) < 0) + { + if (rm != NULL) + { if (BN_copy(rm,num) == NULL) return(0); } + if (dv != NULL) BN_zero(dv); + return(1); + } + + tmp=ctx->bn[ctx->tos]; + snum=ctx->bn[ctx->tos+1]; + sdiv=ctx->bn[ctx->tos+2]; + if (dv == NULL) + res=ctx->bn[ctx->tos+3]; + else res=dv; + + /* First we normalise the numbers */ + norm_shift=BN_BITS2-((BN_num_bits(div))%BN_BITS2); + BN_lshift(sdiv,div,norm_shift); + norm_shift+=BN_BITS2; + BN_lshift(snum,num,norm_shift); + div_n=sdiv->top; + num_n=snum->top; + loop=num_n-div_n; +#ifdef DEBUG +BN_print(stdout,snum); printf(" shifted num, forget last word\n"); +BN_print(stdout,sdiv); printf(" shifted div\n"); +#endif + + /* Lets setup a 'win'dow into snum + * This is the part that corresponds to the current + * 'area' being divided */ + wnum.d= &(snum->d[loop]); + wnum.top= div_n; + wnum.max= snum->max; /* a bit of a lie */ + wnum.neg= 0; + + /* Get the top 2 words of sdiv */ + i=sdiv->top; + d0=sdiv->d[div_n-1]; + d1=sdiv->d[div_n-2]; + + /* pointer to the 'top' of snum */ + wnump= &(snum->d[num_n-1]); + + /* Setup to 'res' */ + res->neg=0; + res->top=loop; + resp= &(res->d[loop-1]); + bn_expand(res,(loop+1)*BN_BITS2); + + /* space for temp */ + bn_expand(tmp,(div_n+1)*BN_BITS2); + +#ifdef DEBUG +printf("wnum="); BN_print(stdout,&wnum); printf(" initial sub check\n"); +printf("div ="); BN_print(stdout,sdiv); printf(" loop=%d\n",loop); +#endif + if (BN_cmp(&wnum,sdiv) >= 0) + { + BN_sub(&wnum,&wnum,sdiv); + *resp=1; + res->d[res->top-1]=1; + } + else + res->top--; + resp--; +#ifdef DEBUG +BN_print(stdout,res); printf(" initial result\n"); +BN_print(stdout,&wnum); printf(" wnum\n"); +#endif + + for (i=0; i<loop-1; i++) + { + BN_ULONG q,n0; + BN_ULLONG t1,t2,t3; + BN_ULONG l0; + + wnum.d--; + wnum.top++; + +#ifdef DEBUG +BN_print(stderr,&wnum); printf(" to divide\n"); +#endif + + q=0; + n0=wnump[0]; + t1=((BN_ULLONG)n0<<BN_BITS2)|wnump[-1]; + if (n0 == d0) + q=BN_MASK2; + else + { + t2=(t1/d0); + q=(t2&BN_MASK2); +#ifdef DEBUG +printf("t1=%08X / d0=%08X = %X (%X)\n",t1,d0,q,t2); +#endif + } + for (;;) + { + t2=(BN_ULLONG)d1*q; + t3=t1-(BN_ULLONG)q*d0; +#ifdef DEBUG +printf("d1*q= %X n01-q*d0 = %X\n",t2,t3); +#endif + if ((t3>>BN_BITS2) || + (t2 <= ((t3<<BN_BITS2)+wnump[-2]))) + break; +#ifdef DEBUG +printf("reduce q\n"); +#endif + q--; + } + l0=bn_mul_word(tmp->d,sdiv->d,div_n,q); + if (l0) + tmp->d[div_n]=l0; + else + tmp->d[div_n]=0; + for (j=div_n+1; j>0; j--) + if (tmp->d[j-1]) break; + tmp->top=j; + +#ifdef DEBUG +printf("q=%08X\n",q); +BN_print(stdout,&wnum); printf(" number\n"); +BN_print(stdout,tmp); printf(" subtract\n"); + +BN_print(stdout,snum); printf(" shifted number before\n"); +BN_print(stdout,&wnum); printf(" wnum before\n"); +#endif + j=wnum.top; + BN_sub(&wnum,&wnum,tmp); + snum->top=snum->top+wnum.top-j; + +#ifdef DEBUG +BN_print(stdout,&wnum); printf(" wnum after\n"); +BN_print(stdout,snum); printf(" shifted number after\n"); +#endif + + if (wnum.neg) + { + q--; + j=wnum.top; + BN_add(&wnum,&wnum,sdiv); + snum->top+=wnum.top-j; + fprintf(stderr,"addback\n"); +#ifdef DEBUG +BN_print(stdout,snum); printf("after addback************************:\n"); +#endif + } + *(resp--)=q; +#ifdef DEBUG +BN_print(stdout,res); printf(" result\n"); +#endif + wnump--; + } + if (rm != NULL) + BN_rshift(rm,snum,norm_shift); + return(1); + } + +main() + { + BIGNUM *a,*b,*c,*d; + BIGNUM *cc,*dd; + BN_CTX *ctx; + int i,x; + + a=BN_new(); + b=BN_new(); + c=BN_new(); + d=BN_new(); + cc=BN_new(); + dd=BN_new(); + ctx=BN_CTX_new(); + +for (i=0; i<10240; i++) + { + BN_rand(a,80,0,0); + BN_rand(b,60,0,0); + + BN_div2(d,c,a,b,ctx); + BN_div(dd,cc,a,b,ctx); + if ((BN_cmp(d,dd) != 0) || (BN_cmp(c,cc) != 0)) + { + BN_print(stderr,a); fprintf(stderr," / "); + BN_print(stderr,b); fprintf(stderr," d="); + BN_print(stderr,d); fprintf(stderr," r= "); + BN_print(stderr,c); fprintf(stderr,"\nd="); + BN_print(stderr,dd); fprintf(stderr," r= "); + BN_print(stderr,cc); fprintf(stderr,"\n"); + } + + } + +#ifdef undef +/* + BN_rand(a,600,0,0); + BN_rand(b,400,0,0); + for (i=0; i<2000000; i++) + { + BN_div2(d,c,a,b,ctx); + } +*/ +/* for (i=0;;) */ +/* for (i=0; i<0xffffffff; i++) + { + BN_ULONG rr,r,a,b,c; + BN_ULLONG l; + + a=rand()&BN_MASK2; + b=rand()&BN_MASK2; + for (;;) + { + c=rand()&BN_MASK2; + if (c) break; + } +/* for (x=1; x<256*256; x++) */ + { + c=x; + a=i>>8; + b=i&0xff; + a&= ~(0xFFFFFF<<(BN_num_bits_word(c))); + + r=bn_div_2word(a,b,c); + + rr=(BN_ULONG)((((BN_ULLONG)a<<BN_BITS2)|b)/c); + + if ((i & 0xfffff) == 0) fprintf(stderr,"%d\n",i,r,rr); +/*if (x == 255) + fprintf(stderr,"%6d/%3d = %4d %4d\n",(a<<8)|b,c,r,rr); */ + if (rr != r) + { + fprintf(stderr,"%8d %02X%02X / %02X = %02X %02X\n", + i,a,b,c,rr,r); + abort(); + } + } + } +#endif + } + +/* Divide h-l by d and return the result. */ +BN_ULONG bn_div_2word(l,h,d) +BN_ULONG l,h,d; + { + BN_ULONG dh,dl,q,ret=0,th,tl,t,top; + int i,count=2; + + if (d == 0) return(-1); + + i=BN_num_bits_word(d); + if ((i != BN_BITS2) && (h > 1<<i)) + { + fprintf(stderr,"Division would overflow\n"); + abort(); + } + i=BN_BITS2-i; + if (h >= d) h-=d; + + if (i) + { + d<<=i; + h=(h<<i)|(l>>(BN_BITS2-i)); + l<<=i; + } + dh=(d&BN_MASK2h)>>BN_BITS4; + dl=(d&BN_MASK2l); + for (;;) + { + if ((h>>BN_BITS4) == dh) + q=BN_MASK2l; + else + q=h/dh; + + for (;;) + { + t=(h-q*dh); + if ((t&BN_MASK2h) || + ((dl*q) <= ( + (t<<BN_BITS4)+ + ((l&BN_MASK2h)>>BN_BITS4)))) + break; + q--; + } + th=q*dh; + tl=q*dl; + t=(tl>>BN_BITS4); + tl=(tl<<BN_BITS4)&BN_MASK2h; + th+=t; + + if (l < tl) th++; + l-=tl; + if (h < th) + { + fprintf(stderr,"add back\n"); + h+=d; + q--; + } + h-=th; + + if (--count == 0) break; + + ret=q<<BN_BITS4; + h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; + l=(l&BN_MASK2l)<<BN_BITS4; + } + ret|=q; + return(ret); + } diff --git a/crypto/bn/stuff/mont.doc b/crypto/bn/stuff/mont.doc new file mode 100644 index 0000000000..55d1d79312 --- /dev/null +++ b/crypto/bn/stuff/mont.doc @@ -0,0 +1,17 @@ +All numbers (a) are stored aR mod N (except abRR) + +RR = REDC(R*R) /* RR mod N */ + + +convert a -> aR +convert b -> bR + + { + abRR = aR * bR + abR = REDC(abRR); /* mod N */ + } + +ab = REDC(abR); /* mod N */ + + +REDC strips off a multiplicaion by R mod N diff --git a/crypto/bn/stuff/wei_mulw.c b/crypto/bn/stuff/wei_mulw.c new file mode 100644 index 0000000000..7f8a1e58fe --- /dev/null +++ b/crypto/bn/stuff/wei_mulw.c @@ -0,0 +1,410 @@ +/* crypto/bn/wei_mulw.c */ + +#include <stdio.h> +#include "cryptlib.h" +#include "bn.h" +#include "bn_lcl.h" + +BN_ULONG bn_add_word(BN_ULONG *a,BN_ULONG c,int num); +BN_ULONG bn_add_words(BN_ULONG *ret,BN_ULONG *a,BN_ULONG *b,int num); +BN_ULONG bn_sub_words(BN_ULONG *ret,BN_ULONG *a,BN_ULONG *b,int num); + +void BN_mul_4words(BN_ULONG *ret,BN_ULONG a0,BN_ULONG a1, + BN_ULONG b0,BN_ULONG b1); + +void pr(a,n,s) +BN_ULONG *a; +int n; + { + while (n--) + fprintf(stdout,"%02X",a[n]); + fprintf(stdout,"%s",s); + } + + +BN_ULONG bn_add_word(a,w,num) +BN_ULONG *a; +BN_ULONG w; +int num; + { + BN_ULONG t; + +#ifdef DEBUG +{ BN_ULONG *aa=a; int i; for (i=num; i>0; i--) fprintf(stdout,"%02X",aa[i-1]); +fprintf(stdout," + %X - ",w); i=num; +#endif + +loop: + t= *a; + t=(t+w)&BN_MASK2; + *(a++)=t; + w=(t < w); + if (w && --num) goto loop; + +#ifdef DEBUG +for (; i>0; i--) fprintf(stdout,"%02X",aa[i-1]); +fprintf(stdout,"\n"); +} +#endif + + return(w); + } + +BN_ULONG bn_add_words(r,a,b,num) +BN_ULONG *r; +BN_ULONG *a; +BN_ULONG *b; +int num; + { +#if defined(BN_LLONG) + BN_ULLONG t; + BN_ULONG c=0; + int i; + + if (num&1) abort(); + + for (i=0; i<num; i+=2) + { + t=(BN_ULLONG)a[i]+b[i]+c; + r[i+0]=L(t); + t=(BN_ULLONG) H(t)+a[i+1]+b[i+1]; + r[i+1]=L(t); + c=H(t); + } + return(c); +#else + BN_ULONG c=0,t1,t2; + + for ( ; num; num--) + { + t1= *(a++); + t2= *(b++); + + if (c) + { + c=(t2 >= ((~t1)&BN_MASK2)); + (*r++)=(t1+t2+1)&BN_MASK2; + } + else + { + t2=(t1+t2)&BN_MASK2; + c=(t2 < t1); + (*r++)=t2; + } + } + return(c); +#endif + } + +BN_ULONG bn_sub_words(r,a,b,num) +BN_ULONG *r; +BN_ULONG *a; +BN_ULONG *b; +int num; + { +#if defined(BN_LLONG) + BN_ULLONG t; + BN_ULONG c=0; + int i; + + if (num&1) abort(); + + for (i=0; i<num; i+=2) + { + t=(BN_ULLONG)a[i]-b[i]-c; + r[i+0]=L(t); + t=(BN_ULLONG)a[i+1]-b[i+1]-(0-H(t))&BN_MASK2; + r[i+1]=L(t); + c=H(t); + } + return(c); +#else + BN_ULONG c=0,t1,t2; + + for ( ; num; num--) + { + t1= *(a++); + t2= *(b++); + + if (c) + { + c=(t1 <= t2); + t1=(t1-t2-1); + } + else + { + c=(t1 < t2); + t1=(t1-t2); + } + (*r++)=t1&BN_MASK2; + } + return(c); +#endif + } + + +/* ret[3,2,1,0] = a1,a0 * b1,b0 */ +void BN_mul_4words(ret,a0,a1,b0,b1) +BN_ULONG *ret; +BN_ULONG a0,a1,b0,b1; + { + BN_ULONG s,u; + BN_ULLONG fix,a0b0,a1b1,tmp; + + if (a1 >= a0) + { + s=(a1-a0); + u=(b0-b1); + fix=(BN_ULLONG)s*u; + if (b0 >= b1) s=0; + } + else + { + BN_ULONG u; + + if (b0 > b1) + { + s=(b0-b1); + u=(a1-a0); + fix=(BN_ULLONG)s*u; + } + else + { + u=(a0-a1); + s=(b1-b0); + fix=(BN_ULLONG)s*u; + s=0; + } + } + + a0b0=(BN_ULLONG)a0*b0; + ret[0]=L(a0b0); + + a1b1=(BN_ULLONG)a1*b1; + tmp=(BN_ULLONG) H(a0b0) + L(a0b0) + L(fix) + L(a1b1); + ret[1]=L(tmp); + + tmp=(BN_ULLONG) a1b1 + H(tmp) + H(a0b0) + H(fix) + H(a1b1) - s; + ret[2]=L(tmp); + ret[3]=H(tmp); + } + +/* ret[3,2,1,0] += a1,a0 * b1,b0 */ +BN_ULONG BN_mul_add_4words(ret,a0,a1,b0,b1) +BN_ULONG *ret; +BN_ULONG a0,a1,b0,b1; + { + BN_ULONG s,u; + BN_ULLONG fix,a0b0,a1b1,tmp; + +#ifdef DEBUG +fprintf(stdout,"%02X%02X%02X%02X",ret[3],ret[2],ret[1],ret[0]); +fprintf(stdout," + ( %02X%02X * %02X%02X ) - ",a1,a0,b1,b0); +#endif + if (a1 >= a0) + { + s=(a1-a0); + u=(b0-b1); + fix=(BN_ULLONG)s*u; + if (b0 >= b1) s=0; + } + else + { + if (b0 > b1) + { + s=(b0-b1); + u=(a1-a0); + fix=(BN_ULLONG)s*u; + } + else + { + u=(a0-a1); + s=(b1-b0); + fix=(BN_ULLONG)s*u; + s=0; + } + } + + a0b0=(BN_ULLONG)a0*b0; + tmp=a0b0+ret[0]; + ret[0]=L(tmp); + + a1b1=(BN_ULLONG)a1*b1; + tmp=(BN_ULLONG) H(tmp) + L(a0b0) + L(fix) + L(a1b1) + ret[1]; + ret[1]=L(tmp); + + tmp=(BN_ULLONG) H(tmp) + L(a1b1) + H(a0b0) + + H(fix) + H(a1b1) -s + ret[2]; + ret[2]=L(tmp); + + tmp=(BN_ULLONG) H(tmp) + H(a1b1) + ret[3]; + ret[3]=L(tmp); +#ifdef DEBUG +fprintf(stdout,"%02X%02X%02X%02X%02X\n",H(tmp),ret[3],ret[2],ret[1],ret[0]); +#endif + return(H(tmp)); + } + +/* ret[3,2,1,0] += a1,a0 * a1,a0 */ +void BN_sqr_4words(ret,a0,a1) +BN_ULONG *ret; +BN_ULONG a0,a1; + { + BN_ULONG s,u; + BN_ULLONG tmp,tmp2; + + tmp=(BN_ULLONG)a0*a0; + ret[0]=L(tmp); + + tmp2=(BN_ULLONG)a0*a1; + tmp=(BN_ULLONG)H(tmp)+L(tmp2)*2; + ret[1]=L(tmp); + + tmp=(BN_ULLONG)a1*a1+H(tmp)+H(tmp2)*2; + ret[2]=L(tmp); + ret[3]=L(tmp); + } + +#define N0 (0) +#define N1 (half) +#define N2 (num) +#define N3 (num+half) + +#define word_cmp(r,a,b,num) \ + { \ + int n=num; \ +\ + (r)=0; \ + while (n--) \ + { \ + if ((a)[(n)] > (b)[(n)]) \ + { (r)=1; break; } \ + else if ((a)[(n)] < (b)[(n)]) \ + { (r)= -1; break; } \ + } \ + } + + +/* (a->top == b->top) && (a->top >= 2) && !(a->top & 1) */ +void bn_recursize_mul(r,t,a,b,num) +BN_ULONG *r,*t,*a,*b; +int num; + { + if ((num < 2) || (num&1)) + abort(); + +/* fprintf(stderr,"num=%d half=%d\n",num,num/2);*/ + if (num == 2) + BN_mul_4words(r,a[0],a[1],b[0],b[1]); + else if (num == 4) + { + BN_ULONG c,tmp; + + BN_mul_4words(&(r[0]),a[0],a[1],b[0],b[1]); + BN_mul_4words(&(r[4]),a[2],a[3],b[2],b[3]); + + c =BN_mul_add_4words(&(r[2]),a[0],a[1],b[2],b[3]); + c+=BN_mul_add_4words(&(r[2]),a[2],a[3],b[0],b[1]); + + bn_add_word(&(r[6]),c,2); + } + else + { + int half=num/2; + int carry,cmp_a,cmp_b; + + word_cmp(cmp_a,&(a[0]),&(a[half]),half); + word_cmp(cmp_b,&(b[0]),&(b[half]),half); + + switch (cmp_a*2+cmp_a+cmp_b) + { + case -4: + bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half); + bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half); + bn_recursize_mul(&(r[N1]),&(t[N2]), + &(t[N0]),&(t[N1]),half); + bn_sub_words(&(r[N2]),&(r[N2]),&(t[N0]),half); + carry= -1; + break; + case -2: + bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half); + bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half); + bn_recursize_mul(&(r[N1]),&(t[N2]), + &(t[N0]),&(t[N1]),half); + carry=0; + break; + case 2: + bn_sub_words(&(t[N0]),&(a[N0]),&(a[N1]),half); + bn_sub_words(&(t[N1]),&(b[N1]),&(b[N0]),half); + bn_recursize_mul(&(r[N1]),&(t[N2]), + &(t[N0]),&(t[N1]),half); + carry=0; + break; + case 4: + bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half); + bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half); + bn_recursize_mul(&(r[N1]),&(t[N2]), + &(t[N0]),&(t[N1]),half); + bn_sub_words(&(r[N2]),&(r[N2]),&(t[N1]),half); + carry= -1; + break; + default: + memset(&(r[N1]),0,sizeof(BN_ULONG)*num); + break; + } + + bn_recursize_mul(&(t[N0]),&(t[N2]),&(a[N0]),&(b[N0]),half); +#ifdef DEBUG + pr(a,half," * "); + pr(b,half," - "); + pr(t,num," - 0\n"); +#endif + memcpy(&(r[N0]),&(t[N0]),half*sizeof(BN_ULONG)); + if (bn_add_words(&(r[N1]),&(r[N1]),&(t[N1]),half)) + { bn_add_word(&(t[N1]),1,half); } + + carry+=bn_add_words(&(r[N1]),&(r[N1]),&(t[N0]),num); + + bn_recursize_mul(&(t[N0]),&(t[N2]),&(a[N1]),&(b[N1]),half); + + carry+=bn_add_words(&(r[N1]),&(r[N1]),&(t[N0]),num); + carry+=bn_add_words(&(r[N2]),&(r[N2]),&(t[N0]),half); + memcpy(&(r[N3]),&(t[N1]),half*sizeof(BN_ULONG)); + + bn_add_word(&(r[N3]),carry,half); + } + } + +main() + { + BIGNUM *a,*b,*r,*t; + int i,j; + + a=BN_new(); + b=BN_new(); + r=BN_new(); + t=BN_new(); + +#define BITS 1024 + bn_expand(r,BITS*2); + bn_expand(t,BITS*2); + fprintf(stdout,"obase=16\n"); + fprintf(stdout,"ibase=16\n"); + for (i=0; i<10; i++) + { + BN_rand(a,BITS,0,0); + BN_rand(b,BITS,0,0); + r->top=(BITS*2)/BN_BITS2; + memset(r->d,0,sizeof(r->top)*sizeof(BN_ULONG)); + memset(t->d,0,sizeof(r->top)*sizeof(BN_ULONG)); + for (j=0; j<1000; j++) + { + +/* BN_mul(r,a,b); /**/ + bn_recursize_mul(r->d,t->d,a->d,b->d,a->top); /**/ + } + BN_print(stdout,a); fprintf(stdout," * "); + BN_print(stdout,b); fprintf(stdout," - "); + BN_print(stdout,r); fprintf(stdout,"\n"); + } + } |