jdk/src/share/native/sun/security/ec/mp_gf2m.c
changeset 3492 e549cea58864
equal deleted inserted replaced
3480:c197e38bf15a 3492:e549cea58864
       
     1 /* *********************************************************************
       
     2  *
       
     3  * Sun elects to have this file available under and governed by the
       
     4  * Mozilla Public License Version 1.1 ("MPL") (see
       
     5  * http://www.mozilla.org/MPL/ for full license text). For the avoidance
       
     6  * of doubt and subject to the following, Sun also elects to allow
       
     7  * licensees to use this file under the MPL, the GNU General Public
       
     8  * License version 2 only or the Lesser General Public License version
       
     9  * 2.1 only. Any references to the "GNU General Public License version 2
       
    10  * or later" or "GPL" in the following shall be construed to mean the
       
    11  * GNU General Public License version 2 only. Any references to the "GNU
       
    12  * Lesser General Public License version 2.1 or later" or "LGPL" in the
       
    13  * following shall be construed to mean the GNU Lesser General Public
       
    14  * License version 2.1 only. However, the following notice accompanied
       
    15  * the original version of this file:
       
    16  *
       
    17  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       
    18  *
       
    19  * The contents of this file are subject to the Mozilla Public License Version
       
    20  * 1.1 (the "License"); you may not use this file except in compliance with
       
    21  * the License. You may obtain a copy of the License at
       
    22  * http://www.mozilla.org/MPL/
       
    23  *
       
    24  * Software distributed under the License is distributed on an "AS IS" basis,
       
    25  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
       
    26  * for the specific language governing rights and limitations under the
       
    27  * License.
       
    28  *
       
    29  * The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
       
    30  *
       
    31  * The Initial Developer of the Original Code is
       
    32  * Sun Microsystems, Inc.
       
    33  * Portions created by the Initial Developer are Copyright (C) 2003
       
    34  * the Initial Developer. All Rights Reserved.
       
    35  *
       
    36  * Contributor(s):
       
    37  *   Sheueling Chang Shantz <sheueling.chang@sun.com> and
       
    38  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
       
    39  *
       
    40  * Alternatively, the contents of this file may be used under the terms of
       
    41  * either the GNU General Public License Version 2 or later (the "GPL"), or
       
    42  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
       
    43  * in which case the provisions of the GPL or the LGPL are applicable instead
       
    44  * of those above. If you wish to allow use of your version of this file only
       
    45  * under the terms of either the GPL or the LGPL, and not to allow others to
       
    46  * use your version of this file under the terms of the MPL, indicate your
       
    47  * decision by deleting the provisions above and replace them with the notice
       
    48  * and other provisions required by the GPL or the LGPL. If you do not delete
       
    49  * the provisions above, a recipient may use your version of this file under
       
    50  * the terms of any one of the MPL, the GPL or the LGPL.
       
    51  *
       
    52  *********************************************************************** */
       
    53 /*
       
    54  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
       
    55  * Use is subject to license terms.
       
    56  */
       
    57 
       
    58 #pragma ident   "%Z%%M% %I%     %E% SMI"
       
    59 
       
    60 #include "mp_gf2m.h"
       
    61 #include "mp_gf2m-priv.h"
       
    62 #include "mplogic.h"
       
    63 #include "mpi-priv.h"
       
    64 
       
    65 const mp_digit mp_gf2m_sqr_tb[16] =
       
    66 {
       
    67       0,     1,     4,     5,    16,    17,    20,    21,
       
    68      64,    65,    68,    69,    80,    81,    84,    85
       
    69 };
       
    70 
       
    71 /* Multiply two binary polynomials mp_digits a, b.
       
    72  * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
       
    73  * Output in two mp_digits rh, rl.
       
    74  */
       
    75 #if MP_DIGIT_BITS == 32
       
    76 void
       
    77 s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
       
    78 {
       
    79     register mp_digit h, l, s;
       
    80     mp_digit tab[8], top2b = a >> 30;
       
    81     register mp_digit a1, a2, a4;
       
    82 
       
    83     a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1;
       
    84 
       
    85     tab[0] =  0; tab[1] = a1;    tab[2] = a2;    tab[3] = a1^a2;
       
    86     tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4;
       
    87 
       
    88     s = tab[b       & 0x7]; l  = s;
       
    89     s = tab[b >>  3 & 0x7]; l ^= s <<  3; h  = s >> 29;
       
    90     s = tab[b >>  6 & 0x7]; l ^= s <<  6; h ^= s >> 26;
       
    91     s = tab[b >>  9 & 0x7]; l ^= s <<  9; h ^= s >> 23;
       
    92     s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20;
       
    93     s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17;
       
    94     s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14;
       
    95     s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11;
       
    96     s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >>  8;
       
    97     s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >>  5;
       
    98     s = tab[b >> 30      ]; l ^= s << 30; h ^= s >>  2;
       
    99 
       
   100     /* compensate for the top two bits of a */
       
   101 
       
   102     if (top2b & 01) { l ^= b << 30; h ^= b >> 2; }
       
   103     if (top2b & 02) { l ^= b << 31; h ^= b >> 1; }
       
   104 
       
   105     *rh = h; *rl = l;
       
   106 }
       
   107 #else
       
   108 void
       
   109 s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
       
   110 {
       
   111     register mp_digit h, l, s;
       
   112     mp_digit tab[16], top3b = a >> 61;
       
   113     register mp_digit a1, a2, a4, a8;
       
   114 
       
   115     a1 = a & (0x1FFFFFFFFFFFFFFFULL); a2 = a1 << 1;
       
   116     a4 = a2 << 1; a8 = a4 << 1;
       
   117     tab[ 0] = 0;     tab[ 1] = a1;       tab[ 2] = a2;       tab[ 3] = a1^a2;
       
   118     tab[ 4] = a4;    tab[ 5] = a1^a4;    tab[ 6] = a2^a4;    tab[ 7] = a1^a2^a4;
       
   119     tab[ 8] = a8;    tab[ 9] = a1^a8;    tab[10] = a2^a8;    tab[11] = a1^a2^a8;
       
   120     tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8;
       
   121 
       
   122     s = tab[b       & 0xF]; l  = s;
       
   123     s = tab[b >>  4 & 0xF]; l ^= s <<  4; h  = s >> 60;
       
   124     s = tab[b >>  8 & 0xF]; l ^= s <<  8; h ^= s >> 56;
       
   125     s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52;
       
   126     s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48;
       
   127     s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44;
       
   128     s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40;
       
   129     s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36;
       
   130     s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32;
       
   131     s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28;
       
   132     s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24;
       
   133     s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20;
       
   134     s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16;
       
   135     s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12;
       
   136     s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >>  8;
       
   137     s = tab[b >> 60      ]; l ^= s << 60; h ^= s >>  4;
       
   138 
       
   139     /* compensate for the top three bits of a */
       
   140 
       
   141     if (top3b & 01) { l ^= b << 61; h ^= b >> 3; }
       
   142     if (top3b & 02) { l ^= b << 62; h ^= b >> 2; }
       
   143     if (top3b & 04) { l ^= b << 63; h ^= b >> 1; }
       
   144 
       
   145     *rh = h; *rl = l;
       
   146 }
       
   147 #endif
       
   148 
       
   149 /* Compute xor-multiply of two binary polynomials  (a1, a0) x (b1, b0)
       
   150  * result is a binary polynomial in 4 mp_digits r[4].
       
   151  * The caller MUST ensure that r has the right amount of space allocated.
       
   152  */
       
   153 void
       
   154 s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
       
   155            const mp_digit b0)
       
   156 {
       
   157     mp_digit m1, m0;
       
   158     /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
       
   159     s_bmul_1x1(r+3, r+2, a1, b1);
       
   160     s_bmul_1x1(r+1, r, a0, b0);
       
   161     s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
       
   162     /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
       
   163     r[2] ^= m1 ^ r[1] ^ r[3];  /* h0 ^= m1 ^ l1 ^ h1; */
       
   164     r[1]  = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0;  /* l1 ^= l0 ^ h0 ^ m0; */
       
   165 }
       
   166 
       
   167 /* Compute xor-multiply of two binary polynomials  (a2, a1, a0) x (b2, b1, b0)
       
   168  * result is a binary polynomial in 6 mp_digits r[6].
       
   169  * The caller MUST ensure that r has the right amount of space allocated.
       
   170  */
       
   171 void
       
   172 s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
       
   173         const mp_digit b2, const mp_digit b1, const mp_digit b0)
       
   174 {
       
   175         mp_digit zm[4];
       
   176 
       
   177         s_bmul_1x1(r+5, r+4, a2, b2);         /* fill top 2 words */
       
   178         s_bmul_2x2(zm, a1, a2^a0, b1, b2^b0); /* fill middle 4 words */
       
   179         s_bmul_2x2(r, a1, a0, b1, b0);        /* fill bottom 4 words */
       
   180 
       
   181         zm[3] ^= r[3];
       
   182         zm[2] ^= r[2];
       
   183         zm[1] ^= r[1] ^ r[5];
       
   184         zm[0] ^= r[0] ^ r[4];
       
   185 
       
   186         r[5]  ^= zm[3];
       
   187         r[4]  ^= zm[2];
       
   188         r[3]  ^= zm[1];
       
   189         r[2]  ^= zm[0];
       
   190 }
       
   191 
       
   192 /* Compute xor-multiply of two binary polynomials  (a3, a2, a1, a0) x (b3, b2, b1, b0)
       
   193  * result is a binary polynomial in 8 mp_digits r[8].
       
   194  * The caller MUST ensure that r has the right amount of space allocated.
       
   195  */
       
   196 void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
       
   197         const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
       
   198         const mp_digit b0)
       
   199 {
       
   200         mp_digit zm[4];
       
   201 
       
   202         s_bmul_2x2(r+4, a3, a2, b3, b2);            /* fill top 4 words */
       
   203         s_bmul_2x2(zm, a3^a1, a2^a0, b3^b1, b2^b0); /* fill middle 4 words */
       
   204         s_bmul_2x2(r, a1, a0, b1, b0);              /* fill bottom 4 words */
       
   205 
       
   206         zm[3] ^= r[3] ^ r[7];
       
   207         zm[2] ^= r[2] ^ r[6];
       
   208         zm[1] ^= r[1] ^ r[5];
       
   209         zm[0] ^= r[0] ^ r[4];
       
   210 
       
   211         r[5]  ^= zm[3];
       
   212         r[4]  ^= zm[2];
       
   213         r[3]  ^= zm[1];
       
   214         r[2]  ^= zm[0];
       
   215 }
       
   216 
       
   217 /* Compute addition of two binary polynomials a and b,
       
   218  * store result in c; c could be a or b, a and b could be equal;
       
   219  * c is the bitwise XOR of a and b.
       
   220  */
       
   221 mp_err
       
   222 mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
       
   223 {
       
   224     mp_digit *pa, *pb, *pc;
       
   225     mp_size ix;
       
   226     mp_size used_pa, used_pb;
       
   227     mp_err res = MP_OKAY;
       
   228 
       
   229     /* Add all digits up to the precision of b.  If b had more
       
   230      * precision than a initially, swap a, b first
       
   231      */
       
   232     if (MP_USED(a) >= MP_USED(b)) {
       
   233         pa = MP_DIGITS(a);
       
   234         pb = MP_DIGITS(b);
       
   235         used_pa = MP_USED(a);
       
   236         used_pb = MP_USED(b);
       
   237     } else {
       
   238         pa = MP_DIGITS(b);
       
   239         pb = MP_DIGITS(a);
       
   240         used_pa = MP_USED(b);
       
   241         used_pb = MP_USED(a);
       
   242     }
       
   243 
       
   244     /* Make sure c has enough precision for the output value */
       
   245     MP_CHECKOK( s_mp_pad(c, used_pa) );
       
   246 
       
   247     /* Do word-by-word xor */
       
   248     pc = MP_DIGITS(c);
       
   249     for (ix = 0; ix < used_pb; ix++) {
       
   250         (*pc++) = (*pa++) ^ (*pb++);
       
   251     }
       
   252 
       
   253     /* Finish the rest of digits until we're actually done */
       
   254     for (; ix < used_pa; ++ix) {
       
   255         *pc++ = *pa++;
       
   256     }
       
   257 
       
   258     MP_USED(c) = used_pa;
       
   259     MP_SIGN(c) = ZPOS;
       
   260     s_mp_clamp(c);
       
   261 
       
   262 CLEANUP:
       
   263     return res;
       
   264 }
       
   265 
       
   266 #define s_mp_div2(a) MP_CHECKOK( mpl_rsh((a), (a), 1) );
       
   267 
       
   268 /* Compute binary polynomial multiply d = a * b */
       
   269 static void
       
   270 s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
       
   271 {
       
   272     mp_digit a_i, a0b0, a1b1, carry = 0;
       
   273     while (a_len--) {
       
   274         a_i = *a++;
       
   275         s_bmul_1x1(&a1b1, &a0b0, a_i, b);
       
   276         *d++ = a0b0 ^ carry;
       
   277         carry = a1b1;
       
   278     }
       
   279     *d = carry;
       
   280 }
       
   281 
       
   282 /* Compute binary polynomial xor multiply accumulate d ^= a * b */
       
   283 static void
       
   284 s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
       
   285 {
       
   286     mp_digit a_i, a0b0, a1b1, carry = 0;
       
   287     while (a_len--) {
       
   288         a_i = *a++;
       
   289         s_bmul_1x1(&a1b1, &a0b0, a_i, b);
       
   290         *d++ ^= a0b0 ^ carry;
       
   291         carry = a1b1;
       
   292     }
       
   293     *d ^= carry;
       
   294 }
       
   295 
       
   296 /* Compute binary polynomial xor multiply c = a * b.
       
   297  * All parameters may be identical.
       
   298  */
       
   299 mp_err
       
   300 mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
       
   301 {
       
   302     mp_digit *pb, b_i;
       
   303     mp_int tmp;
       
   304     mp_size ib, a_used, b_used;
       
   305     mp_err res = MP_OKAY;
       
   306 
       
   307     MP_DIGITS(&tmp) = 0;
       
   308 
       
   309     ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
       
   310 
       
   311     if (a == c) {
       
   312         MP_CHECKOK( mp_init_copy(&tmp, a) );
       
   313         if (a == b)
       
   314             b = &tmp;
       
   315         a = &tmp;
       
   316     } else if (b == c) {
       
   317         MP_CHECKOK( mp_init_copy(&tmp, b) );
       
   318         b = &tmp;
       
   319     }
       
   320 
       
   321     if (MP_USED(a) < MP_USED(b)) {
       
   322         const mp_int *xch = b;      /* switch a and b if b longer */
       
   323         b = a;
       
   324         a = xch;
       
   325     }
       
   326 
       
   327     MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
       
   328     MP_CHECKOK( s_mp_pad(c, USED(a) + USED(b)) );
       
   329 
       
   330     pb = MP_DIGITS(b);
       
   331     s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
       
   332 
       
   333     /* Outer loop:  Digits of b */
       
   334     a_used = MP_USED(a);
       
   335     b_used = MP_USED(b);
       
   336         MP_USED(c) = a_used + b_used;
       
   337     for (ib = 1; ib < b_used; ib++) {
       
   338         b_i = *pb++;
       
   339 
       
   340         /* Inner product:  Digits of a */
       
   341         if (b_i)
       
   342             s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
       
   343         else
       
   344             MP_DIGIT(c, ib + a_used) = b_i;
       
   345     }
       
   346 
       
   347     s_mp_clamp(c);
       
   348 
       
   349     SIGN(c) = ZPOS;
       
   350 
       
   351 CLEANUP:
       
   352     mp_clear(&tmp);
       
   353     return res;
       
   354 }
       
   355 
       
   356 
       
   357 /* Compute modular reduction of a and store result in r.
       
   358  * r could be a.
       
   359  * For modular arithmetic, the irreducible polynomial f(t) is represented
       
   360  * as an array of int[], where f(t) is of the form:
       
   361  *     f(t) = t^p[0] + t^p[1] + ... + t^p[k]
       
   362  * where m = p[0] > p[1] > ... > p[k] = 0.
       
   363  */
       
   364 mp_err
       
   365 mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
       
   366 {
       
   367     int j, k;
       
   368     int n, dN, d0, d1;
       
   369     mp_digit zz, *z, tmp;
       
   370     mp_size used;
       
   371     mp_err res = MP_OKAY;
       
   372 
       
   373     /* The algorithm does the reduction in place in r,
       
   374      * if a != r, copy a into r first so reduction can be done in r
       
   375      */
       
   376     if (a != r) {
       
   377         MP_CHECKOK( mp_copy(a, r) );
       
   378     }
       
   379     z = MP_DIGITS(r);
       
   380 
       
   381     /* start reduction */
       
   382     dN = p[0] / MP_DIGIT_BITS;
       
   383     used = MP_USED(r);
       
   384 
       
   385     for (j = used - 1; j > dN;) {
       
   386 
       
   387         zz = z[j];
       
   388         if (zz == 0) {
       
   389             j--; continue;
       
   390         }
       
   391         z[j] = 0;
       
   392 
       
   393         for (k = 1; p[k] > 0; k++) {
       
   394             /* reducing component t^p[k] */
       
   395             n = p[0] - p[k];
       
   396             d0 = n % MP_DIGIT_BITS;
       
   397             d1 = MP_DIGIT_BITS - d0;
       
   398             n /= MP_DIGIT_BITS;
       
   399             z[j-n] ^= (zz>>d0);
       
   400             if (d0)
       
   401                 z[j-n-1] ^= (zz<<d1);
       
   402         }
       
   403 
       
   404         /* reducing component t^0 */
       
   405         n = dN;
       
   406         d0 = p[0] % MP_DIGIT_BITS;
       
   407         d1 = MP_DIGIT_BITS - d0;
       
   408         z[j-n] ^= (zz >> d0);
       
   409         if (d0)
       
   410             z[j-n-1] ^= (zz << d1);
       
   411 
       
   412     }
       
   413 
       
   414     /* final round of reduction */
       
   415     while (j == dN) {
       
   416 
       
   417         d0 = p[0] % MP_DIGIT_BITS;
       
   418         zz = z[dN] >> d0;
       
   419         if (zz == 0) break;
       
   420         d1 = MP_DIGIT_BITS - d0;
       
   421 
       
   422         /* clear up the top d1 bits */
       
   423         if (d0) z[dN] = (z[dN] << d1) >> d1;
       
   424         *z ^= zz; /* reduction t^0 component */
       
   425 
       
   426         for (k = 1; p[k] > 0; k++) {
       
   427             /* reducing component t^p[k]*/
       
   428             n = p[k] / MP_DIGIT_BITS;
       
   429             d0 = p[k] % MP_DIGIT_BITS;
       
   430             d1 = MP_DIGIT_BITS - d0;
       
   431             z[n] ^= (zz << d0);
       
   432             tmp = zz >> d1;
       
   433             if (d0 && tmp)
       
   434                 z[n+1] ^= tmp;
       
   435         }
       
   436     }
       
   437 
       
   438     s_mp_clamp(r);
       
   439 CLEANUP:
       
   440     return res;
       
   441 }
       
   442 
       
   443 /* Compute the product of two polynomials a and b, reduce modulo p,
       
   444  * Store the result in r.  r could be a or b; a could be b.
       
   445  */
       
   446 mp_err
       
   447 mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
       
   448 {
       
   449     mp_err res;
       
   450 
       
   451     if (a == b) return mp_bsqrmod(a, p, r);
       
   452     if ((res = mp_bmul(a, b, r) ) != MP_OKAY)
       
   453         return res;
       
   454     return mp_bmod(r, p, r);
       
   455 }
       
   456 
       
   457 /* Compute binary polynomial squaring c = a*a mod p .
       
   458  * Parameter r and a can be identical.
       
   459  */
       
   460 
       
   461 mp_err
       
   462 mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
       
   463 {
       
   464     mp_digit *pa, *pr, a_i;
       
   465     mp_int tmp;
       
   466     mp_size ia, a_used;
       
   467     mp_err res;
       
   468 
       
   469     ARGCHK(a != NULL && r != NULL, MP_BADARG);
       
   470     MP_DIGITS(&tmp) = 0;
       
   471 
       
   472     if (a == r) {
       
   473         MP_CHECKOK( mp_init_copy(&tmp, a) );
       
   474         a = &tmp;
       
   475     }
       
   476 
       
   477     MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
       
   478     MP_CHECKOK( s_mp_pad(r, 2*USED(a)) );
       
   479 
       
   480     pa = MP_DIGITS(a);
       
   481     pr = MP_DIGITS(r);
       
   482     a_used = MP_USED(a);
       
   483         MP_USED(r) = 2 * a_used;
       
   484 
       
   485     for (ia = 0; ia < a_used; ia++) {
       
   486         a_i = *pa++;
       
   487         *pr++ = gf2m_SQR0(a_i);
       
   488         *pr++ = gf2m_SQR1(a_i);
       
   489     }
       
   490 
       
   491     MP_CHECKOK( mp_bmod(r, p, r) );
       
   492     s_mp_clamp(r);
       
   493     SIGN(r) = ZPOS;
       
   494 
       
   495 CLEANUP:
       
   496     mp_clear(&tmp);
       
   497     return res;
       
   498 }
       
   499 
       
   500 /* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
       
   501  * Store the result in r. r could be x or y, and x could equal y.
       
   502  * Uses algorithm Modular_Division_GF(2^m) from
       
   503  *     Chang-Shantz, S.  "From Euclid's GCD to Montgomery Multiplication to
       
   504  *     the Great Divide".
       
   505  */
       
   506 int
       
   507 mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
       
   508     const unsigned int p[], mp_int *r)
       
   509 {
       
   510     mp_int aa, bb, uu;
       
   511     mp_int *a, *b, *u, *v;
       
   512     mp_err res = MP_OKAY;
       
   513 
       
   514     MP_DIGITS(&aa) = 0;
       
   515     MP_DIGITS(&bb) = 0;
       
   516     MP_DIGITS(&uu) = 0;
       
   517 
       
   518     MP_CHECKOK( mp_init_copy(&aa, x) );
       
   519     MP_CHECKOK( mp_init_copy(&uu, y) );
       
   520     MP_CHECKOK( mp_init_copy(&bb, pp) );
       
   521     MP_CHECKOK( s_mp_pad(r, USED(pp)) );
       
   522     MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
       
   523 
       
   524     a = &aa; b= &bb; u=&uu; v=r;
       
   525     /* reduce x and y mod p */
       
   526     MP_CHECKOK( mp_bmod(a, p, a) );
       
   527     MP_CHECKOK( mp_bmod(u, p, u) );
       
   528 
       
   529     while (!mp_isodd(a)) {
       
   530         s_mp_div2(a);
       
   531         if (mp_isodd(u)) {
       
   532             MP_CHECKOK( mp_badd(u, pp, u) );
       
   533         }
       
   534         s_mp_div2(u);
       
   535     }
       
   536 
       
   537     do {
       
   538         if (mp_cmp_mag(b, a) > 0) {
       
   539             MP_CHECKOK( mp_badd(b, a, b) );
       
   540             MP_CHECKOK( mp_badd(v, u, v) );
       
   541             do {
       
   542                 s_mp_div2(b);
       
   543                 if (mp_isodd(v)) {
       
   544                     MP_CHECKOK( mp_badd(v, pp, v) );
       
   545                 }
       
   546                 s_mp_div2(v);
       
   547             } while (!mp_isodd(b));
       
   548         }
       
   549         else if ((MP_DIGIT(a,0) == 1) && (MP_USED(a) == 1))
       
   550             break;
       
   551         else {
       
   552             MP_CHECKOK( mp_badd(a, b, a) );
       
   553             MP_CHECKOK( mp_badd(u, v, u) );
       
   554             do {
       
   555                 s_mp_div2(a);
       
   556                 if (mp_isodd(u)) {
       
   557                     MP_CHECKOK( mp_badd(u, pp, u) );
       
   558                 }
       
   559                 s_mp_div2(u);
       
   560             } while (!mp_isodd(a));
       
   561         }
       
   562     } while (1);
       
   563 
       
   564     MP_CHECKOK( mp_copy(u, r) );
       
   565 
       
   566 CLEANUP:
       
   567     /* XXX this appears to be a memory leak in the NSS code */
       
   568     mp_clear(&aa);
       
   569     mp_clear(&bb);
       
   570     mp_clear(&uu);
       
   571     return res;
       
   572 
       
   573 }
       
   574 
       
   575 /* Convert the bit-string representation of a polynomial a into an array
       
   576  * of integers corresponding to the bits with non-zero coefficient.
       
   577  * Up to max elements of the array will be filled.  Return value is total
       
   578  * number of coefficients that would be extracted if array was large enough.
       
   579  */
       
   580 int
       
   581 mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
       
   582 {
       
   583     int i, j, k;
       
   584     mp_digit top_bit, mask;
       
   585 
       
   586     top_bit = 1;
       
   587     top_bit <<= MP_DIGIT_BIT - 1;
       
   588 
       
   589     for (k = 0; k < max; k++) p[k] = 0;
       
   590     k = 0;
       
   591 
       
   592     for (i = MP_USED(a) - 1; i >= 0; i--) {
       
   593         mask = top_bit;
       
   594         for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
       
   595             if (MP_DIGITS(a)[i] & mask) {
       
   596                 if (k < max) p[k] = MP_DIGIT_BIT * i + j;
       
   597                 k++;
       
   598             }
       
   599             mask >>= 1;
       
   600         }
       
   601     }
       
   602 
       
   603     return k;
       
   604 }
       
   605 
       
   606 /* Convert the coefficient array representation of a polynomial to a
       
   607  * bit-string.  The array must be terminated by 0.
       
   608  */
       
   609 mp_err
       
   610 mp_barr2poly(const unsigned int p[], mp_int *a)
       
   611 {
       
   612 
       
   613     mp_err res = MP_OKAY;
       
   614     int i;
       
   615 
       
   616     mp_zero(a);
       
   617     for (i = 0; p[i] > 0; i++) {
       
   618         MP_CHECKOK( mpl_set_bit(a, p[i], 1) );
       
   619     }
       
   620     MP_CHECKOK( mpl_set_bit(a, 0, 1) );
       
   621 
       
   622 CLEANUP:
       
   623     return res;
       
   624 }