aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn/stuff
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/stuff')
-rw-r--r--crypto/bn/stuff/bn_knuth.c378
-rw-r--r--crypto/bn/stuff/div.c340
-rw-r--r--crypto/bn/stuff/mont.doc17
-rw-r--r--crypto/bn/stuff/wei_mulw.c410
4 files changed, 1145 insertions, 0 deletions
diff --git a/crypto/bn/stuff/bn_knuth.c b/crypto/bn/stuff/bn_knuth.c
new file mode 100644
index 0000000000..9a3f4130ed
--- /dev/null
+++ b/crypto/bn/stuff/bn_knuth.c
@@ -0,0 +1,378 @@
+/* crypto/bn/bn_knuth.c */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn.h"
+
+/* This is just a test implementation, it has not been modified for
+ * speed and it still has memory leaks. */
+
+int BN_mask_bits(BIGNUM *a,int n);
+
+#undef DEBUG
+#define MAIN
+
+/* r must be different to a and b
+ * Toom-Cook multiplication algorithm, taken from
+ * The Art Of Computer Programming, Volume 2, Donald Knuth
+ */
+
+#define CODE1 ((BIGNUM *)0x01)
+#define CODE2 ((BIGNUM *)0x02)
+#define CODE3 ((BIGNUM *)0x03)
+#define MAXK (30+1)
+
+#define C3 3
+#define C4 4
+#define C5 5
+#define C6 6
+#define C7 7
+#define C8 8
+#define C9 9
+#define C10 10
+#define DONE 11
+
+int new_total=0;
+int Free_total=0;
+int max=0,max_total=0;
+
+BIGNUM *LBN_new(void );
+BIGNUM *LBN_dup(BIGNUM *a);
+void LBN_free(BIGNUM *a);
+
+int BN_mul_knuth(w, a, b)
+BIGNUM *w;
+BIGNUM *a;
+BIGNUM *b;
+ {
+ int ret=1;
+ int i,j,n,an,bn,y,z;
+ BIGNUM *U[MAXK],*V[MAXK],*T[MAXK];
+ BIGNUM *C[(MAXK*2*3)];
+ BIGNUM *W[(MAXK*2)],*t1,*t2,*t3,*t4;
+ int Utos,Vtos,Ctos,Wtos,Ttos;
+ unsigned int k,Q,R;
+ unsigned int q[MAXK];
+ unsigned int r[MAXK];
+ int state;
+
+ /* C1 */
+ Utos=Vtos=Ctos=Wtos=Ttos=0;
+ k=1;
+ q[0]=q[1]=64;
+ r[0]=r[1]=4;
+ Q=6;
+ R=2;
+
+ if (!bn_expand(w,BN_BITS2*2)) goto err;
+ an=BN_num_bits(a);
+ bn=BN_num_bits(b);
+ n=(an > bn)?an:bn;
+ while ((q[k-1]+q[k]) < n)
+ {
+ k++;
+ Q+=R;
+ i=R+1;
+ if ((i*i) <= Q) R=i;
+ q[k]=(1<<Q);
+ r[k]=(1<<R);
+ }
+#ifdef DEBUG
+ printf("k =");
+ for (i=0; i<=k; i++) printf("%7d",i);
+ printf("\nq[k]=");
+ for (i=0; i<=k; i++) printf("%7d",q[i]);
+ printf("\nr[k]=");
+ for (i=0; i<=k; i++) printf("%7d",r[i]);
+ printf("\n");
+#endif
+
+ /* C2 */
+ C[Ctos++]=CODE1;
+ if ((t1=LBN_dup(a)) == NULL) goto err;
+ C[Ctos++]=t1;
+ if ((t1=LBN_dup(b)) == NULL) goto err;
+ C[Ctos++]=t1;
+
+ state=C3;
+ for (;;)
+ {
+#ifdef DEBUG
+ printf("state=C%d, Ctos=%d Wtos=%d\n",state,Ctos,Wtos);
+#endif
+ switch (state)
+ {
+ int lr,lq,lp;
+ case C3:
+ k--;
+ if (k == 0)
+ {
+ t1=C[--Ctos];
+ t2=C[--Ctos];
+#ifdef DEBUG
+ printf("Ctos=%d poped %d\n",Ctos,2);
+#endif
+ if ((t2->top == 0) || (t1->top == 0))
+ w->top=0;
+ else
+ BN_mul(w,t1,t2);
+
+ LBN_free(t1); /* FREE */
+ LBN_free(t2); /* FREE */
+ state=C10;
+ }
+ else
+ {
+ lr=r[k];
+ lq=q[k];
+ lp=q[k-1]+q[k];
+ state=C4;
+ }
+ break;
+ case C4:
+ for (z=0; z<2; z++) /* do for u and v */
+ {
+ /* break the item at C[Ctos-1]
+ * into lr+1 parts of lq bits each
+ * for j=0; j<=2r; j++
+ */
+ t1=C[--Ctos]; /* pop off u */
+#ifdef DEBUG
+ printf("Ctos=%d poped %d\n",Ctos,1);
+#endif
+ if ((t2=LBN_dup(t1)) == NULL) goto err;
+ BN_mask_bits(t2,lq);
+ T[Ttos++]=t2;
+#ifdef DEBUG
+ printf("C4 r=0 bits=%d\n",BN_num_bits(t2));
+#endif
+ for (i=1; i<=lr; i++)
+ {
+ if (!BN_rshift(t1,t1,lq)) goto err;
+ if ((t2=LBN_dup(t1)) == NULL) goto err;
+ BN_mask_bits(t2,lq);
+ T[Ttos++]=t2;
+#ifdef DEBUG
+ printf("C4 r=%d bits=%d\n",i,
+ BN_num_bits(t2));
+#endif
+ }
+ LBN_free(t1);
+
+ if ((t2=LBN_new()) == NULL) goto err;
+ if ((t3=LBN_new()) == NULL) goto err;
+ for (j=0; j<=2*lr; j++)
+ {
+ if ((t1=LBN_new()) == NULL) goto err;
+
+ if (!BN_set_word(t3,j)) goto err;
+ for (i=lr; i>=0; i--)
+ {
+ if (!BN_mul(t2,t1,t3)) goto err;
+ if (!BN_add(t1,t2,T[i])) goto err;
+ }
+ /* t1 is U(j) */
+ if (z == 0)
+ U[Utos++]=t1;
+ else
+ V[Vtos++]=t1;
+ }
+ LBN_free(t2);
+ LBN_free(t3);
+ while (Ttos) LBN_free(T[--Ttos]);
+ }
+#ifdef DEBUG
+ for (i=0; i<Utos; i++)
+ printf("U[%2d]=%4d bits\n",i,BN_num_bits(U[i]));
+ for (i=0; i<Vtos; i++)
+ printf("V[%2d]=%4d bits\n",i,BN_num_bits(V[i]));
+#endif
+ /* C5 */
+#ifdef DEBUG
+ printf("PUSH CODE2 and %d CODE3 onto stack\n",2*lr);
+#endif
+ C[Ctos++]=CODE2;
+ for (i=2*lr; i>0; i--)
+ {
+ C[Ctos++]=V[i];
+ C[Ctos++]=U[i];
+ C[Ctos++]=CODE3;
+ }
+ C[Ctos++]=V[0];
+ C[Ctos++]=U[0];
+#ifdef DEBUG
+ printf("Ctos=%d pushed %d\n",Ctos,2*lr*3+3);
+#endif
+ Vtos=Utos=0;
+ state=C3;
+ break;
+ case C6:
+ if ((t1=LBN_dup(w)) == NULL) goto err;
+ W[Wtos++]=t1;
+#ifdef DEBUG
+ printf("put %d bit number onto w\n",BN_num_bits(t1));
+#endif
+ state=C3;
+ break;
+ case C7:
+ lr=r[k];
+ lq=q[k];
+ lp=q[k]+q[k-1];
+ z=Wtos-2*lr-1;
+ for (j=1; j<=2*lr; j++)
+ {
+ for (i=2*lr; i>=j; i--)
+ {
+ if (!BN_sub(W[z+i],W[z+i],W[z+i-1])) goto err;
+ BN_div_word(W[z+i],j);
+ }
+ }
+ state=C8;
+ break;
+ case C8:
+ y=2*lr-1;
+ if ((t1=LBN_new()) == NULL) goto err;
+ if ((t3=LBN_new()) == NULL) goto err;
+
+ for (j=y; j>0; j--)
+ {
+ if (!BN_set_word(t3,j)) goto err;
+ for (i=j; i<=y; i++)
+ {
+ if (!BN_mul(t1,W[z+i+1],t3)) goto err;
+ if (!BN_sub(W[z+i],W[z+i],t1)) goto err;
+ }
+ }
+ LBN_free(t1);
+ LBN_free(t3);
+ state=C9;
+ break;
+ case C9:
+ BN_zero(w);
+#ifdef DEBUG
+ printf("lq=%d\n",lq);
+#endif
+ for (i=lr*2; i>=0; i--)
+ {
+ BN_lshift(w,w,lq);
+ BN_add(w,w,W[z+i]);
+ }
+ for (i=0; i<=lr*2; i++)
+ LBN_free(W[--Wtos]);
+ state=C10;
+ break;
+ case C10:
+ k++;
+ t1=C[--Ctos];
+#ifdef DEBUG
+ printf("Ctos=%d poped %d\n",Ctos,1);
+ printf("code= CODE%d\n",t1);
+#endif
+ if (t1 == CODE3)
+ state=C6;
+ else if (t1 == CODE2)
+ {
+ if ((t2=LBN_dup(w)) == NULL) goto err;
+ W[Wtos++]=t2;
+ state=C7;
+ }
+ else if (t1 == CODE1)
+ {
+ state=DONE;
+ }
+ else
+ {
+ printf("BAD ERROR\n");
+ goto err;
+ }
+ break;
+ default:
+ printf("bad state\n");
+ goto err;
+ break;
+ }
+ if (state == DONE) break;
+ }
+ ret=1;
+err:
+ if (ret == 0) printf("ERROR\n");
+ return(ret);
+ }
+
+#ifdef MAIN
+main()
+ {
+ BIGNUM *a,*b,*r;
+ int i;
+
+ if ((a=LBN_new()) == NULL) goto err;
+ if ((b=LBN_new()) == NULL) goto err;
+ if ((r=LBN_new()) == NULL) goto err;
+
+ if (!BN_rand(a,1024*2,0,0)) goto err;
+ if (!BN_rand(b,1024*2,0,0)) goto err;
+
+ for (i=0; i<10; i++)
+ {
+ if (!BN_mul_knuth(r,a,b)) goto err; /**/
+ /*if (!BN_mul(r,a,b)) goto err; /**/
+ }
+BN_print(stdout,a); printf(" * ");
+BN_print(stdout,b); printf(" =\n");
+BN_print(stdout,r); printf("\n");
+
+printf("BN_new() =%d\nBN_free()=%d max=%d\n",new_total,Free_total,max);
+
+
+ exit(0);
+err:
+ ERR_load_crypto_strings();
+ ERR_print_errors(stderr);
+ exit(1);
+ }
+#endif
+
+int BN_mask_bits(a,n)
+BIGNUM *a;
+int n;
+ {
+ int b,w;
+
+ w=n/BN_BITS2;
+ b=n%BN_BITS2;
+ if (w >= a->top) return(0);
+ if (b == 0)
+ a->top=w;
+ else
+ {
+ a->top=w+1;
+ a->d[w]&= ~(BN_MASK2<<b);
+ }
+ return(1);
+ }
+
+BIGNUM *LBN_dup(a)
+BIGNUM *a;
+ {
+ new_total++;
+ max_total++;
+ if (max_total > max) max=max_total;
+ return(BN_dup(a));
+ }
+
+BIGNUM *LBN_new()
+ {
+ new_total++;
+ max_total++;
+ if (max_total > max) max=max_total;
+ return(BN_new());
+ }
+
+void LBN_free(a)
+BIGNUM *a;
+ {
+ max_total--;
+ if (max_total > max) max=max_total;
+ Free_total++;
+ BN_free(a);
+ }
diff --git a/crypto/bn/stuff/div.c b/crypto/bn/stuff/div.c
new file mode 100644
index 0000000000..3d6e08622d
--- /dev/null
+++ b/crypto/bn/stuff/div.c
@@ -0,0 +1,340 @@
+/* crypto/bn/div.c */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn.h"
+
+BN_ULONG bn_div_2word();
+
+int BN_div2(dv, rm, num, div,ctx)
+BIGNUM *dv;
+BIGNUM *rm;
+BIGNUM *num;
+BIGNUM *div;
+BN_CTX *ctx;
+ {
+ int norm_shift,i,j,nm,nd,loop;
+ BIGNUM *tmp,wnum,*snum,*sdiv,*res;
+ BN_ULONG *resp,*wnump;
+ BN_ULONG d0,d1;
+ int num_n,div_n;
+
+#ifdef DEBUG
+BN_print(stdout,num); printf(" number\n");
+BN_print(stdout,div); printf(" divisor\n");
+#endif
+ if (BN_is_zero(num))
+ {
+ BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
+ return(0);
+ }
+
+ if (BN_cmp(num,div) < 0)
+ {
+ if (rm != NULL)
+ { if (BN_copy(rm,num) == NULL) return(0); }
+ if (dv != NULL) BN_zero(dv);
+ return(1);
+ }
+
+ tmp=ctx->bn[ctx->tos];
+ snum=ctx->bn[ctx->tos+1];
+ sdiv=ctx->bn[ctx->tos+2];
+ if (dv == NULL)
+ res=ctx->bn[ctx->tos+3];
+ else res=dv;
+
+ /* First we normalise the numbers */
+ norm_shift=BN_BITS2-((BN_num_bits(div))%BN_BITS2);
+ BN_lshift(sdiv,div,norm_shift);
+ norm_shift+=BN_BITS2;
+ BN_lshift(snum,num,norm_shift);
+ div_n=sdiv->top;
+ num_n=snum->top;
+ loop=num_n-div_n;
+#ifdef DEBUG
+BN_print(stdout,snum); printf(" shifted num, forget last word\n");
+BN_print(stdout,sdiv); printf(" shifted div\n");
+#endif
+
+ /* Lets setup a 'win'dow into snum
+ * This is the part that corresponds to the current
+ * 'area' being divided */
+ wnum.d= &(snum->d[loop]);
+ wnum.top= div_n;
+ wnum.max= snum->max; /* a bit of a lie */
+ wnum.neg= 0;
+
+ /* Get the top 2 words of sdiv */
+ i=sdiv->top;
+ d0=sdiv->d[div_n-1];
+ d1=sdiv->d[div_n-2];
+
+ /* pointer to the 'top' of snum */
+ wnump= &(snum->d[num_n-1]);
+
+ /* Setup to 'res' */
+ res->neg=0;
+ res->top=loop;
+ resp= &(res->d[loop-1]);
+ bn_expand(res,(loop+1)*BN_BITS2);
+
+ /* space for temp */
+ bn_expand(tmp,(div_n+1)*BN_BITS2);
+
+#ifdef DEBUG
+printf("wnum="); BN_print(stdout,&wnum); printf(" initial sub check\n");
+printf("div ="); BN_print(stdout,sdiv); printf(" loop=%d\n",loop);
+#endif
+ if (BN_cmp(&wnum,sdiv) >= 0)
+ {
+ BN_sub(&wnum,&wnum,sdiv);
+ *resp=1;
+ res->d[res->top-1]=1;
+ }
+ else
+ res->top--;
+ resp--;
+#ifdef DEBUG
+BN_print(stdout,res); printf(" initial result\n");
+BN_print(stdout,&wnum); printf(" wnum\n");
+#endif
+
+ for (i=0; i<loop-1; i++)
+ {
+ BN_ULONG q,n0;
+ BN_ULLONG t1,t2,t3;
+ BN_ULONG l0;
+
+ wnum.d--;
+ wnum.top++;
+
+#ifdef DEBUG
+BN_print(stderr,&wnum); printf(" to divide\n");
+#endif
+
+ q=0;
+ n0=wnump[0];
+ t1=((BN_ULLONG)n0<<BN_BITS2)|wnump[-1];
+ if (n0 == d0)
+ q=BN_MASK2;
+ else
+ {
+ t2=(t1/d0);
+ q=(t2&BN_MASK2);
+#ifdef DEBUG
+printf("t1=%08X / d0=%08X = %X (%X)\n",t1,d0,q,t2);
+#endif
+ }
+ for (;;)
+ {
+ t2=(BN_ULLONG)d1*q;
+ t3=t1-(BN_ULLONG)q*d0;
+#ifdef DEBUG
+printf("d1*q= %X n01-q*d0 = %X\n",t2,t3);
+#endif
+ if ((t3>>BN_BITS2) ||
+ (t2 <= ((t3<<BN_BITS2)+wnump[-2])))
+ break;
+#ifdef DEBUG
+printf("reduce q\n");
+#endif
+ q--;
+ }
+ l0=bn_mul_word(tmp->d,sdiv->d,div_n,q);
+ if (l0)
+ tmp->d[div_n]=l0;
+ else
+ tmp->d[div_n]=0;
+ for (j=div_n+1; j>0; j--)
+ if (tmp->d[j-1]) break;
+ tmp->top=j;
+
+#ifdef DEBUG
+printf("q=%08X\n",q);
+BN_print(stdout,&wnum); printf(" number\n");
+BN_print(stdout,tmp); printf(" subtract\n");
+
+BN_print(stdout,snum); printf(" shifted number before\n");
+BN_print(stdout,&wnum); printf(" wnum before\n");
+#endif
+ j=wnum.top;
+ BN_sub(&wnum,&wnum,tmp);
+ snum->top=snum->top+wnum.top-j;
+
+#ifdef DEBUG
+BN_print(stdout,&wnum); printf(" wnum after\n");
+BN_print(stdout,snum); printf(" shifted number after\n");
+#endif
+
+ if (wnum.neg)
+ {
+ q--;
+ j=wnum.top;
+ BN_add(&wnum,&wnum,sdiv);
+ snum->top+=wnum.top-j;
+ fprintf(stderr,"addback\n");
+#ifdef DEBUG
+BN_print(stdout,snum); printf("after addback************************:\n");
+#endif
+ }
+ *(resp--)=q;
+#ifdef DEBUG
+BN_print(stdout,res); printf(" result\n");
+#endif
+ wnump--;
+ }
+ if (rm != NULL)
+ BN_rshift(rm,snum,norm_shift);
+ return(1);
+ }
+
+main()
+ {
+ BIGNUM *a,*b,*c,*d;
+ BIGNUM *cc,*dd;
+ BN_CTX *ctx;
+ int i,x;
+
+ a=BN_new();
+ b=BN_new();
+ c=BN_new();
+ d=BN_new();
+ cc=BN_new();
+ dd=BN_new();
+ ctx=BN_CTX_new();
+
+for (i=0; i<10240; i++)
+ {
+ BN_rand(a,80,0,0);
+ BN_rand(b,60,0,0);
+
+ BN_div2(d,c,a,b,ctx);
+ BN_div(dd,cc,a,b,ctx);
+ if ((BN_cmp(d,dd) != 0) || (BN_cmp(c,cc) != 0))
+ {
+ BN_print(stderr,a); fprintf(stderr," / ");
+ BN_print(stderr,b); fprintf(stderr," d=");
+ BN_print(stderr,d); fprintf(stderr," r= ");
+ BN_print(stderr,c); fprintf(stderr,"\nd=");
+ BN_print(stderr,dd); fprintf(stderr," r= ");
+ BN_print(stderr,cc); fprintf(stderr,"\n");
+ }
+
+ }
+
+#ifdef undef
+/*
+ BN_rand(a,600,0,0);
+ BN_rand(b,400,0,0);
+ for (i=0; i<2000000; i++)
+ {
+ BN_div2(d,c,a,b,ctx);
+ }
+*/
+/* for (i=0;;) */
+/* for (i=0; i<0xffffffff; i++)
+ {
+ BN_ULONG rr,r,a,b,c;
+ BN_ULLONG l;
+
+ a=rand()&BN_MASK2;
+ b=rand()&BN_MASK2;
+ for (;;)
+ {
+ c=rand()&BN_MASK2;
+ if (c) break;
+ }
+/* for (x=1; x<256*256; x++) */
+ {
+ c=x;
+ a=i>>8;
+ b=i&0xff;
+ a&= ~(0xFFFFFF<<(BN_num_bits_word(c)));
+
+ r=bn_div_2word(a,b,c);
+
+ rr=(BN_ULONG)((((BN_ULLONG)a<<BN_BITS2)|b)/c);
+
+ if ((i & 0xfffff) == 0) fprintf(stderr,"%d\n",i,r,rr);
+/*if (x == 255)
+ fprintf(stderr,"%6d/%3d = %4d %4d\n",(a<<8)|b,c,r,rr); */
+ if (rr != r)
+ {
+ fprintf(stderr,"%8d %02X%02X / %02X = %02X %02X\n",
+ i,a,b,c,rr,r);
+ abort();
+ }
+ }
+ }
+#endif
+ }
+
+/* Divide h-l by d and return the result. */
+BN_ULONG bn_div_2word(l,h,d)
+BN_ULONG l,h,d;
+ {
+ BN_ULONG dh,dl,q,ret=0,th,tl,t,top;
+ int i,count=2;
+
+ if (d == 0) return(-1);
+
+ i=BN_num_bits_word(d);
+ if ((i != BN_BITS2) && (h > 1<<i))
+ {
+ fprintf(stderr,"Division would overflow\n");
+ abort();
+ }
+ i=BN_BITS2-i;
+ if (h >= d) h-=d;
+
+ if (i)
+ {
+ d<<=i;
+ h=(h<<i)|(l>>(BN_BITS2-i));
+ l<<=i;
+ }
+ dh=(d&BN_MASK2h)>>BN_BITS4;
+ dl=(d&BN_MASK2l);
+ for (;;)
+ {
+ if ((h>>BN_BITS4) == dh)
+ q=BN_MASK2l;
+ else
+ q=h/dh;
+
+ for (;;)
+ {
+ t=(h-q*dh);
+ if ((t&BN_MASK2h) ||
+ ((dl*q) <= (
+ (t<<BN_BITS4)+
+ ((l&BN_MASK2h)>>BN_BITS4))))
+ break;
+ q--;
+ }
+ th=q*dh;
+ tl=q*dl;
+ t=(tl>>BN_BITS4);
+ tl=(tl<<BN_BITS4)&BN_MASK2h;
+ th+=t;
+
+ if (l < tl) th++;
+ l-=tl;
+ if (h < th)
+ {
+ fprintf(stderr,"add back\n");
+ h+=d;
+ q--;
+ }
+ h-=th;
+
+ if (--count == 0) break;
+
+ ret=q<<BN_BITS4;
+ h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+ l=(l&BN_MASK2l)<<BN_BITS4;
+ }
+ ret|=q;
+ return(ret);
+ }
diff --git a/crypto/bn/stuff/mont.doc b/crypto/bn/stuff/mont.doc
new file mode 100644
index 0000000000..55d1d79312
--- /dev/null
+++ b/crypto/bn/stuff/mont.doc
@@ -0,0 +1,17 @@
+All numbers (a) are stored aR mod N (except abRR)
+
+RR = REDC(R*R) /* RR mod N */
+
+
+convert a -> aR
+convert b -> bR
+
+ {
+ abRR = aR * bR
+ abR = REDC(abRR); /* mod N */
+ }
+
+ab = REDC(abR); /* mod N */
+
+
+REDC strips off a multiplicaion by R mod N
diff --git a/crypto/bn/stuff/wei_mulw.c b/crypto/bn/stuff/wei_mulw.c
new file mode 100644
index 0000000000..7f8a1e58fe
--- /dev/null
+++ b/crypto/bn/stuff/wei_mulw.c
@@ -0,0 +1,410 @@
+/* crypto/bn/wei_mulw.c */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn.h"
+#include "bn_lcl.h"
+
+BN_ULONG bn_add_word(BN_ULONG *a,BN_ULONG c,int num);
+BN_ULONG bn_add_words(BN_ULONG *ret,BN_ULONG *a,BN_ULONG *b,int num);
+BN_ULONG bn_sub_words(BN_ULONG *ret,BN_ULONG *a,BN_ULONG *b,int num);
+
+void BN_mul_4words(BN_ULONG *ret,BN_ULONG a0,BN_ULONG a1,
+ BN_ULONG b0,BN_ULONG b1);
+
+void pr(a,n,s)
+BN_ULONG *a;
+int n;
+ {
+ while (n--)
+ fprintf(stdout,"%02X",a[n]);
+ fprintf(stdout,"%s",s);
+ }
+
+
+BN_ULONG bn_add_word(a,w,num)
+BN_ULONG *a;
+BN_ULONG w;
+int num;
+ {
+ BN_ULONG t;
+
+#ifdef DEBUG
+{ BN_ULONG *aa=a; int i; for (i=num; i>0; i--) fprintf(stdout,"%02X",aa[i-1]);
+fprintf(stdout," + %X - ",w); i=num;
+#endif
+
+loop:
+ t= *a;
+ t=(t+w)&BN_MASK2;
+ *(a++)=t;
+ w=(t < w);
+ if (w && --num) goto loop;
+
+#ifdef DEBUG
+for (; i>0; i--) fprintf(stdout,"%02X",aa[i-1]);
+fprintf(stdout,"\n");
+}
+#endif
+
+ return(w);
+ }
+
+BN_ULONG bn_add_words(r,a,b,num)
+BN_ULONG *r;
+BN_ULONG *a;
+BN_ULONG *b;
+int num;
+ {
+#if defined(BN_LLONG)
+ BN_ULLONG t;
+ BN_ULONG c=0;
+ int i;
+
+ if (num&1) abort();
+
+ for (i=0; i<num; i+=2)
+ {
+ t=(BN_ULLONG)a[i]+b[i]+c;
+ r[i+0]=L(t);
+ t=(BN_ULLONG) H(t)+a[i+1]+b[i+1];
+ r[i+1]=L(t);
+ c=H(t);
+ }
+ return(c);
+#else
+ BN_ULONG c=0,t1,t2;
+
+ for ( ; num; num--)
+ {
+ t1= *(a++);
+ t2= *(b++);
+
+ if (c)
+ {
+ c=(t2 >= ((~t1)&BN_MASK2));
+ (*r++)=(t1+t2+1)&BN_MASK2;
+ }
+ else
+ {
+ t2=(t1+t2)&BN_MASK2;
+ c=(t2 < t1);
+ (*r++)=t2;
+ }
+ }
+ return(c);
+#endif
+ }
+
+BN_ULONG bn_sub_words(r,a,b,num)
+BN_ULONG *r;
+BN_ULONG *a;
+BN_ULONG *b;
+int num;
+ {
+#if defined(BN_LLONG)
+ BN_ULLONG t;
+ BN_ULONG c=0;
+ int i;
+
+ if (num&1) abort();
+
+ for (i=0; i<num; i+=2)
+ {
+ t=(BN_ULLONG)a[i]-b[i]-c;
+ r[i+0]=L(t);
+ t=(BN_ULLONG)a[i+1]-b[i+1]-(0-H(t))&BN_MASK2;
+ r[i+1]=L(t);
+ c=H(t);
+ }
+ return(c);
+#else
+ BN_ULONG c=0,t1,t2;
+
+ for ( ; num; num--)
+ {
+ t1= *(a++);
+ t2= *(b++);
+
+ if (c)
+ {
+ c=(t1 <= t2);
+ t1=(t1-t2-1);
+ }
+ else
+ {
+ c=(t1 < t2);
+ t1=(t1-t2);
+ }
+ (*r++)=t1&BN_MASK2;
+ }
+ return(c);
+#endif
+ }
+
+
+/* ret[3,2,1,0] = a1,a0 * b1,b0 */
+void BN_mul_4words(ret,a0,a1,b0,b1)
+BN_ULONG *ret;
+BN_ULONG a0,a1,b0,b1;
+ {
+ BN_ULONG s,u;
+ BN_ULLONG fix,a0b0,a1b1,tmp;
+
+ if (a1 >= a0)
+ {
+ s=(a1-a0);
+ u=(b0-b1);
+ fix=(BN_ULLONG)s*u;
+ if (b0 >= b1) s=0;
+ }
+ else
+ {
+ BN_ULONG u;
+
+ if (b0 > b1)
+ {
+ s=(b0-b1);
+ u=(a1-a0);
+ fix=(BN_ULLONG)s*u;
+ }
+ else
+ {
+ u=(a0-a1);
+ s=(b1-b0);
+ fix=(BN_ULLONG)s*u;
+ s=0;
+ }
+ }
+
+ a0b0=(BN_ULLONG)a0*b0;
+ ret[0]=L(a0b0);
+
+ a1b1=(BN_ULLONG)a1*b1;
+ tmp=(BN_ULLONG) H(a0b0) + L(a0b0) + L(fix) + L(a1b1);
+ ret[1]=L(tmp);
+
+ tmp=(BN_ULLONG) a1b1 + H(tmp) + H(a0b0) + H(fix) + H(a1b1) - s;
+ ret[2]=L(tmp);
+ ret[3]=H(tmp);
+ }
+
+/* ret[3,2,1,0] += a1,a0 * b1,b0 */
+BN_ULONG BN_mul_add_4words(ret,a0,a1,b0,b1)
+BN_ULONG *ret;
+BN_ULONG a0,a1,b0,b1;
+ {
+ BN_ULONG s,u;
+ BN_ULLONG fix,a0b0,a1b1,tmp;
+
+#ifdef DEBUG
+fprintf(stdout,"%02X%02X%02X%02X",ret[3],ret[2],ret[1],ret[0]);
+fprintf(stdout," + ( %02X%02X * %02X%02X ) - ",a1,a0,b1,b0);
+#endif
+ if (a1 >= a0)
+ {
+ s=(a1-a0);
+ u=(b0-b1);
+ fix=(BN_ULLONG)s*u;
+ if (b0 >= b1) s=0;
+ }
+ else
+ {
+ if (b0 > b1)
+ {
+ s=(b0-b1);
+ u=(a1-a0);
+ fix=(BN_ULLONG)s*u;
+ }
+ else
+ {
+ u=(a0-a1);
+ s=(b1-b0);
+ fix=(BN_ULLONG)s*u;
+ s=0;
+ }
+ }
+
+ a0b0=(BN_ULLONG)a0*b0;
+ tmp=a0b0+ret[0];
+ ret[0]=L(tmp);
+
+ a1b1=(BN_ULLONG)a1*b1;
+ tmp=(BN_ULLONG) H(tmp) + L(a0b0) + L(fix) + L(a1b1) + ret[1];
+ ret[1]=L(tmp);
+
+ tmp=(BN_ULLONG) H(tmp) + L(a1b1) + H(a0b0) +
+ H(fix) + H(a1b1) -s + ret[2];
+ ret[2]=L(tmp);
+
+ tmp=(BN_ULLONG) H(tmp) + H(a1b1) + ret[3];
+ ret[3]=L(tmp);
+#ifdef DEBUG
+fprintf(stdout,"%02X%02X%02X%02X%02X\n",H(tmp),ret[3],ret[2],ret[1],ret[0]);
+#endif
+ return(H(tmp));
+ }
+
+/* ret[3,2,1,0] += a1,a0 * a1,a0 */
+void BN_sqr_4words(ret,a0,a1)
+BN_ULONG *ret;
+BN_ULONG a0,a1;
+ {
+ BN_ULONG s,u;
+ BN_ULLONG tmp,tmp2;
+
+ tmp=(BN_ULLONG)a0*a0;
+ ret[0]=L(tmp);
+
+ tmp2=(BN_ULLONG)a0*a1;
+ tmp=(BN_ULLONG)H(tmp)+L(tmp2)*2;
+ ret[1]=L(tmp);
+
+ tmp=(BN_ULLONG)a1*a1+H(tmp)+H(tmp2)*2;
+ ret[2]=L(tmp);
+ ret[3]=L(tmp);
+ }
+
+#define N0 (0)
+#define N1 (half)
+#define N2 (num)
+#define N3 (num+half)
+
+#define word_cmp(r,a,b,num) \
+ { \
+ int n=num; \
+\
+ (r)=0; \
+ while (n--) \
+ { \
+ if ((a)[(n)] > (b)[(n)]) \
+ { (r)=1; break; } \
+ else if ((a)[(n)] < (b)[(n)]) \
+ { (r)= -1; break; } \
+ } \
+ }
+
+
+/* (a->top == b->top) && (a->top >= 2) && !(a->top & 1) */
+void bn_recursize_mul(r,t,a,b,num)
+BN_ULONG *r,*t,*a,*b;
+int num;
+ {
+ if ((num < 2) || (num&1))
+ abort();
+
+/* fprintf(stderr,"num=%d half=%d\n",num,num/2);*/
+ if (num == 2)
+ BN_mul_4words(r,a[0],a[1],b[0],b[1]);
+ else if (num == 4)
+ {
+ BN_ULONG c,tmp;
+
+ BN_mul_4words(&(r[0]),a[0],a[1],b[0],b[1]);
+ BN_mul_4words(&(r[4]),a[2],a[3],b[2],b[3]);
+
+ c =BN_mul_add_4words(&(r[2]),a[0],a[1],b[2],b[3]);
+ c+=BN_mul_add_4words(&(r[2]),a[2],a[3],b[0],b[1]);
+
+ bn_add_word(&(r[6]),c,2);
+ }
+ else
+ {
+ int half=num/2;
+ int carry,cmp_a,cmp_b;
+
+ word_cmp(cmp_a,&(a[0]),&(a[half]),half);
+ word_cmp(cmp_b,&(b[0]),&(b[half]),half);
+
+ switch (cmp_a*2+cmp_a+cmp_b)
+ {
+ case -4:
+ bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half);
+ bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half);
+ bn_recursize_mul(&(r[N1]),&(t[N2]),
+ &(t[N0]),&(t[N1]),half);
+ bn_sub_words(&(r[N2]),&(r[N2]),&(t[N0]),half);
+ carry= -1;
+ break;
+ case -2:
+ bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half);
+ bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half);
+ bn_recursize_mul(&(r[N1]),&(t[N2]),
+ &(t[N0]),&(t[N1]),half);
+ carry=0;
+ break;
+ case 2:
+ bn_sub_words(&(t[N0]),&(a[N0]),&(a[N1]),half);
+ bn_sub_words(&(t[N1]),&(b[N1]),&(b[N0]),half);
+ bn_recursize_mul(&(r[N1]),&(t[N2]),
+ &(t[N0]),&(t[N1]),half);
+ carry=0;
+ break;
+ case 4:
+ bn_sub_words(&(t[N0]),&(a[N1]),&(a[N0]),half);
+ bn_sub_words(&(t[N1]),&(b[N0]),&(b[N1]),half);
+ bn_recursize_mul(&(r[N1]),&(t[N2]),
+ &(t[N0]),&(t[N1]),half);
+ bn_sub_words(&(r[N2]),&(r[N2]),&(t[N1]),half);
+ carry= -1;
+ break;
+ default:
+ memset(&(r[N1]),0,sizeof(BN_ULONG)*num);
+ break;
+ }
+
+ bn_recursize_mul(&(t[N0]),&(t[N2]),&(a[N0]),&(b[N0]),half);
+#ifdef DEBUG
+ pr(a,half," * ");
+ pr(b,half," - ");
+ pr(t,num," - 0\n");
+#endif
+ memcpy(&(r[N0]),&(t[N0]),half*sizeof(BN_ULONG));
+ if (bn_add_words(&(r[N1]),&(r[N1]),&(t[N1]),half))
+ { bn_add_word(&(t[N1]),1,half); }
+
+ carry+=bn_add_words(&(r[N1]),&(r[N1]),&(t[N0]),num);
+
+ bn_recursize_mul(&(t[N0]),&(t[N2]),&(a[N1]),&(b[N1]),half);
+
+ carry+=bn_add_words(&(r[N1]),&(r[N1]),&(t[N0]),num);
+ carry+=bn_add_words(&(r[N2]),&(r[N2]),&(t[N0]),half);
+ memcpy(&(r[N3]),&(t[N1]),half*sizeof(BN_ULONG));
+
+ bn_add_word(&(r[N3]),carry,half);
+ }
+ }
+
+main()
+ {
+ BIGNUM *a,*b,*r,*t;
+ int i,j;
+
+ a=BN_new();
+ b=BN_new();
+ r=BN_new();
+ t=BN_new();
+
+#define BITS 1024
+ bn_expand(r,BITS*2);
+ bn_expand(t,BITS*2);
+ fprintf(stdout,"obase=16\n");
+ fprintf(stdout,"ibase=16\n");
+ for (i=0; i<10; i++)
+ {
+ BN_rand(a,BITS,0,0);
+ BN_rand(b,BITS,0,0);
+ r->top=(BITS*2)/BN_BITS2;
+ memset(r->d,0,sizeof(r->top)*sizeof(BN_ULONG));
+ memset(t->d,0,sizeof(r->top)*sizeof(BN_ULONG));
+ for (j=0; j<1000; j++)
+ {
+
+/* BN_mul(r,a,b); /**/
+ bn_recursize_mul(r->d,t->d,a->d,b->d,a->top); /**/
+ }
+ BN_print(stdout,a); fprintf(stdout," * ");
+ BN_print(stdout,b); fprintf(stdout," - ");
+ BN_print(stdout,r); fprintf(stdout,"\n");
+ }
+ }