/************************************************************************************ * * * Alex project routines for generalized and variable length ONB mathematics. * * copied from original source and modified with new math. Must be optimized for * * specific platforms later. Specific implementations should remove C constructs * * in favor of assembler for more speed. * * * * Author = mike rosing * * date = June 7, 1997 * ************************************************************************************/ #include "field2n.h" /* globals initialized once and used for multiply and inversion routines. */ static INDEX Lambda[2][field_prime]; static INDEX lg2_m; static INDEX log2[field_prime+1]; static INDEX two_inx[field_prime]; static ELEMENT two_bit[field_prime]; static unsigned char shift_by[256]; char parity[256]; void rot_left(a) FIELD2N *a; { INDEX i; ELEMENT bit,temp; bit = (a->e[0] & UPRBIT) ? 1L : 0L; for (i=NUMWORD; i>=0; i--) { temp = (a->e[i] & MSB) ? 1L : 0L; a->e[i] = ( a->e[i] << 1) | bit; bit = temp; } a->e[0] &= UPRMASK; } void rot_right(a) FIELD2N *a; { INDEX i; ELEMENT bit,temp; bit = (a->e[NUMWORD] & 1) ? UPRBIT : 0L; SUMLOOP(i) { temp = ( a->e[i] >> 1) | bit; bit = (a->e[i] & 1) ? MSB : 0L; a->e[i] = temp; } a->e[0] &= UPRMASK; } void null(a) FIELD2N *a; { INDEX i; SUMLOOP(i) a->e[i] = 0; } void copy (a,b) FIELD2N *a,*b; { INDEX i; SUMLOOP(i) b->e[i] = a->e[i]; } void null_cust(a) CUSTFIELD *a; { register INDEX i; for (i=0; i<=LONGWORD; i++) a->e[i] = 0; } void copy_cust (a,b) CUSTFIELD *a,*b; { register INDEX i; for (i=0; i<=LONGWORD; i++) b->e[i] = a->e[i]; } /* binary search for most significant bit within word */ INDEX log_2( x) ELEMENT x; { INDEX k, lg2; ELEMENT ebit, bitsave, bitmask; lg2 = 0; bitsave = x; /* grab bits we're interested in. */ k = WORDSIZE/2; /* first see if msb is in top half */ bitmask = -1L<> k); } return( lg2); } /* create Lambda [i,j] table. indexed by j, each entry contains the value of i which satisfies 2^i + 2^j = 1 || 0 mod field_prime. There are two 16 bit entries per index j except for zero. See references for details. Since 2^0 = 1 and 2^2n = 1, 2^n = -1 and the first entry would be 2^0 + 2^n = 0. Multiplying both sides by 2, it stays congruent to zero. So Half the table is unnecessary since multiplying exponents by 2 is the same as squaring is the same as rotation once. Lambda[0] stores n = (field_prime - 1)/2. The terms congruent to one must be found via lookup in the log table. Since every entry for (i,j) also generates an entry for (j,i), the whole 1D table can be built quickly. */ void genlambda() { INDEX i, logof, n, index, twoexp; for (i=0; ie[i] = copyb.e[i] & amatrix[zero_index].e[i]; /* main loop has two lookups for every position. */ for (j = 1; je[i] ^= copyb.e[i] & (amatrix[zero_index].e[i] ^ amatrix[one_index].e[i]); } } /* set b = a * u^n, where n>0 and n <= field_prime */ void cus_times_u_to_n(CUSTFIELD *a, int n, CUSTFIELD *b) { #define SIZE (2*LONGWORD+2) ELEMENT w, t[SIZE+1]; INDEX i, j, n1, n2, n3; if ( n == field_prime ) { copy_cust(a, b); return; } for ( j=0; j<=SIZE; j++ ) t[j] = 0; n1 = n / WORDSIZE; j = SIZE-n1; n2 = n & (WORDSIZE-1); if ( n2 ) { n3 = WORDSIZE-n2; for ( i=LONGWORD; i>=0; i-- ) { t[j--] |= a->e[i] << n2; t[j] |= a->e[i] >> n3; } } else { for ( i=LONGWORD; i>=0; i-- ) { t[j--] |= a->e[i]; } } n3 = LONGSHIFT+1; i = SIZE-LONGWORD; for ( j=SIZE; j>=SIZE-n1; j-- ) { t[j] |= t[i--] >> n3; t[j] |= t[i] << (WORDSIZE-n3); } w = t[SIZE-LONGWORD] & (1L << LONGSHIFT ) ? ~0 : 0; for ( i=0; i<=LONGWORD; i++ ) b->e[i] = t[i+SIZE-LONGWORD] ^ w; b->e[0] &= LONGMASK; #undef SIZE } /* This algorithm is the Almost Inverse Algorithm of Schroeppel, et al. given in "Fast Key Exchange with Elliptic Curve Systems */ void opt_inv(FIELD2N *a, FIELD2N *dest) { CUSTFIELD f, b, c, g; INDEX i, j, k, m, n, f_top, c_top; ELEMENT bits, t, mask; /* f, b, c, and g are not in optimal normal basis format: they are held in 'customary format', i.e. a0 + a1*u^1 + a2^u^2 + ...; For the comments in this routine, the polynomials are assumed to be polynomials in u. */ /* Set g to polynomial (u^p-1)/(u-1) */ for ( i=1; i<=LONGWORD; i++ ) g.e[i] = ~0; g.e[0] = LONGMASK | (1L << LONGSHIFT); /* Convert a to 'customary format', putting answer in f */ null_cust(&f); j = 0; for ( k=NUMWORD; k>=0; k-- ) { bits = a->e[k]; m = k>0 ? WORDSIZE : UPRSHIFT; for ( i=0; i>= 1; } } /* Set c to 0, b to 1, and n to 0 */ null_cust(&c); null_cust(&b); b.e[LONGWORD] = 1; n = 0; /* Now find a polynomial b, such that a*b = u^n */ /* f and g shrink, b and c grow. The code takes advantage of this. c_top and f_top are the variables which control this behavior */ c_top = LONGWORD; f_top = 0; do { i = shift_by[f.e[LONGWORD] & 0xff]; n+=i; /* Shift f right i (divide by u^i) */ m = 0; for ( j=f_top; j<=LONGWORD; j++ ) { bits = f.e[j]; f.e[j] = (bits>>i) | ((ELEMENT)m << (WORDSIZE-i)); m = bits; } } while ( i == 8 && (f.e[LONGWORD] & 1) == 0 ); for ( j=0; j= deg g, but we don't need to be so fine */ if ( f.e[f_top] < g.e[f_top] ) goto loop2; loop1: /* f = f+g, making f divisible by u */ for ( i=f_top; i<=LONGWORD; i++ ) f.e[i] ^= g.e[i]; /* b = b+c */ for ( i=c_top; i<=LONGWORD; i++ ) b.e[i] ^= c.e[i]; do { i = shift_by[f.e[LONGWORD] & 0xff]; n+=i; /* Shift c left i (multiply by u^i), lengthening it if needed */ m = 0; for ( j=LONGWORD; j>=c_top; j-- ) { bits = c.e[j]; c.e[j] = (bits<> (WORDSIZE-i); } if ( m ) c.e[c_top=j] = m; /* Shift f right i (divide by u^i) */ m = 0; for ( j=f_top; j<=LONGWORD; j++ ) { bits = f.e[j]; f.e[j] = (bits>>i) | ((ELEMENT)m << (WORDSIZE-i)); m = bits; } } while ( i == 8 && (f.e[LONGWORD] & 1) == 0 ); /* Check if we are done (f=1) */ for ( j=f_top; j0 ) goto done; do { /* Shorten f and g when possible */ while ( g.e[f_top] == 0 && f.e[f_top] == 0 ) f_top++; /* g needs to be bigger - if not, exchange f with g and b with c. (Actually jump to the other loop instead of doing the exchange) The published algorithm requires deg g >= deg f, but we don't need to be so fine */ if ( g.e[f_top] < f.e[f_top] ) goto loop1; loop2: /* g = f+g, making g divisible by u */ for ( i=f_top; i<=LONGWORD; i++ ) g.e[i] ^= f.e[i]; /* c = b+c */ for ( i=c_top; i<=LONGWORD; i++ ) c.e[i] ^= b.e[i]; do { i = shift_by[g.e[LONGWORD] & 0xff]; n+=i; /* Shift b left i (multiply by u^i), lengthening it if needed */ m = 0; for ( j=LONGWORD; j>=c_top; j-- ) { bits = b.e[j]; b.e[j] = (bits<> (WORDSIZE-i); } if ( m ) b.e[c_top=j] = m; /* Shift g right i (divide by u^i) */ m = 0; for ( j=f_top; j<=LONGWORD; j++ ) { bits = g.e[j]; g.e[j] = (bits>>i) | ((ELEMENT)m << (WORDSIZE-i)); m = bits; } } while ( i == 8 && (g.e[LONGWORD] & 1) == 0 ); /* Check if we are done (g=1) */ for ( j=f_top; j=0; k-- ) { bits = 0; t = 1; mask = k > 0 ? ~0 : UPRMASK; do { if ( b.e[two_inx[j]] & two_bit[j] ) bits ^= t; j++; t <<= 1; } while ( t&mask ); dest->e[k] ^= bits; } } /* nu_inv */ /* since I have more than one thing to do, create initialization routine. Thanks Dave! */ void init_opt_math() { #ifdef TYPE2 genlambda2(); #else genlambda(); #endif init_two(); }