@@ -191,27 +191,64 @@ int fe_inv__distinct_nb(byte *r, const byte *x, fe_inv__distinct_nb_ctx_t* ctx)
191191 fe_mul__distinct (ctx -> s , x , x );
192192 fe_mul__distinct (r , ctx -> s , x );
193193 ctx -> i = 0 ;
194+ ctx -> subState = 0 ;
194195 ctx -> state = 1 ;
195196 break ;
196197 case 1 :
197- if ((ctx -> i )++ < 248 ) {
198- fe_mul__distinct (ctx -> s , r , r );
199- fe_mul__distinct (r , ctx -> s , x );
198+ if (ctx -> i < 248 ) {
199+ if (ctx -> subState == 0 ) {
200+ fe_mul__distinct (ctx -> s , r , r );
201+ ctx -> subState = 1 ;
202+ }
203+ else {
204+ fe_mul__distinct (r , ctx -> s , x );
205+ ctx -> subState = 0 ;
206+ ++ (ctx -> i );
207+ }
200208 }
201209 else {
202210 ctx -> state = 2 ;
211+ ctx -> subState = 0 ;
203212 }
204213 break ;
205214 case 2 :
206- fe_mul__distinct (ctx -> s , r , r );
207- fe_mul__distinct (r , ctx -> s , ctx -> s );
208- fe_mul__distinct (ctx -> s , r , x );
209- fe_mul__distinct (r , ctx -> s , ctx -> s );
210- fe_mul__distinct (ctx -> s , r , r );
211- fe_mul__distinct (r , ctx -> s , x );
212- fe_mul__distinct (ctx -> s , r , r );
213- fe_mul__distinct (r , ctx -> s , x );
214- ret = 0 ;
215+ switch (ctx -> subState ) {
216+ case 0 :
217+ fe_mul__distinct (ctx -> s , r , r );
218+ ctx -> subState = 1 ;
219+ break ;
220+ case 1 :
221+ fe_mul__distinct (r , ctx -> s , ctx -> s );
222+ ctx -> subState = 2 ;
223+ break ;
224+ case 2 :
225+ fe_mul__distinct (ctx -> s , r , x );
226+ ctx -> subState = 3 ;
227+ break ;
228+ case 3 :
229+ fe_mul__distinct (r , ctx -> s , ctx -> s );
230+ ctx -> subState = 4 ;
231+ break ;
232+ case 4 :
233+ fe_mul__distinct (ctx -> s , r , r );
234+ ctx -> subState = 5 ;
235+ break ;
236+ case 5 :
237+ fe_mul__distinct (r , ctx -> s , x );
238+ ctx -> subState = 6 ;
239+ break ;
240+ case 6 :
241+ fe_mul__distinct (ctx -> s , r , r );
242+ ctx -> subState = 7 ;
243+ break ;
244+ case 7 :
245+ fe_mul__distinct (r , ctx -> s , x );
246+ ret = 0 ;
247+ break ;
248+ default :
249+ ctx -> subState = 0 ;
250+ break ;
251+ }
215252 break ;
216253 }
217254
@@ -237,33 +274,134 @@ int curve25519_nb(byte *result, const byte *n, const byte *p,
237274 XMEMSET (ctx -> zm1 , 0 , sizeof (ctx -> zm1 ));
238275 lm_copy (ctx -> xm , p );
239276 ctx -> i = 253 ;
277+ ctx -> subState = 0 ;
240278 ctx -> state = 1 ;
241279 break ;
242280 case 1 :
243281 if (ctx -> i >= 0 ) {
244- const int bit = (n [ctx -> i >> 3 ] >> (ctx -> i & 7 )) & 1 ;
245- byte xms [F25519_SIZE ];
246- byte zms [F25519_SIZE ];
247-
248- /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
249- xc_diffadd (ctx -> xm1 , ctx -> zm1 , p , f25519_one , ctx -> xm , ctx -> zm ,
250- ctx -> xm1 , ctx -> zm1 );
251- xc_double (ctx -> xm , ctx -> zm , ctx -> xm , ctx -> zm );
252-
253- /* Compute P_(2m+1) */
254- xc_diffadd (xms , zms , ctx -> xm1 , ctx -> zm1 , ctx -> xm ,
255- ctx -> zm , p , f25519_one );
256-
257- /* Select:
258- * bit = 1 --> (P_(2m+1), P_(2m))
259- * bit = 0 --> (P_(2m), P_(2m-1))
260- */
261- fe_select (ctx -> xm1 , ctx -> xm1 , ctx -> xm , bit );
262- fe_select (ctx -> zm1 , ctx -> zm1 , ctx -> zm , bit );
263- fe_select (ctx -> xm , ctx -> xm , xms , bit );
264- fe_select (ctx -> zm , ctx -> zm , zms , bit );
265-
266- -- (ctx -> i );
282+ switch (ctx -> subState ) {
283+ case 0 :
284+ ctx -> bit = (n [ctx -> i >> 3 ] >> (ctx -> i & 7 )) & 1 ;
285+ /* Diffadd step 1 */
286+ lm_add (ctx -> a , ctx -> xm , ctx -> zm );
287+ lm_sub (ctx -> b , ctx -> xm1 , ctx -> zm1 );
288+ fe_mul__distinct (ctx -> da , ctx -> a , ctx -> b );
289+ ctx -> subState = 1 ;
290+ break ;
291+ case 1 :
292+ /* Diffadd step 2 */
293+ lm_sub (ctx -> b , ctx -> xm , ctx -> zm );
294+ lm_add (ctx -> a , ctx -> xm1 , ctx -> zm1 );
295+ fe_mul__distinct (ctx -> cb , ctx -> a , ctx -> b );
296+ ctx -> subState = 2 ;
297+ break ;
298+ case 2 :
299+ /* Diffadd step 3 */
300+ lm_add (ctx -> a , ctx -> da , ctx -> cb );
301+ fe_mul__distinct (ctx -> b , ctx -> a , ctx -> a );
302+ ctx -> subState = 3 ;
303+ break ;
304+ case 3 :
305+ /* Diffadd step 4 */
306+ fe_mul__distinct (ctx -> xm1 , f25519_one , ctx -> b );
307+ ctx -> subState = 4 ;
308+ break ;
309+ case 4 :
310+ /* Diffadd step 5 */
311+ lm_sub (ctx -> a , ctx -> da , ctx -> cb );
312+ fe_mul__distinct (ctx -> b , ctx -> a , ctx -> a );
313+ ctx -> subState = 5 ;
314+ break ;
315+ case 5 :
316+ /* Diffadd step 6 */
317+ fe_mul__distinct (ctx -> zm1 , p , ctx -> b );
318+ ctx -> subState = 6 ;
319+ break ;
320+ case 6 :
321+ /* Double step 1 */
322+ fe_mul__distinct (ctx -> x1sq , ctx -> xm , ctx -> xm );
323+ ctx -> subState = 7 ;
324+ break ;
325+ case 7 :
326+ /* Double step 2 */
327+ fe_mul__distinct (ctx -> z1sq , ctx -> zm , ctx -> zm );
328+ ctx -> subState = 8 ;
329+ break ;
330+ case 8 :
331+ /* Double step 3 */
332+ fe_mul__distinct (ctx -> x1z1 , ctx -> xm , ctx -> zm );
333+ ctx -> subState = 9 ;
334+ break ;
335+ case 9 :
336+ /* Double step 4 */
337+ lm_sub (ctx -> a , ctx -> x1sq , ctx -> z1sq );
338+ fe_mul__distinct (ctx -> xm , ctx -> a , ctx -> a );
339+ ctx -> subState = 10 ;
340+ break ;
341+ case 10 :
342+ /* Double step 5 */
343+ fe_mul_c (ctx -> a , ctx -> x1z1 , 486662 );
344+ lm_add (ctx -> a , ctx -> x1sq , ctx -> a );
345+ lm_add (ctx -> a , ctx -> z1sq , ctx -> a );
346+ fe_mul__distinct (ctx -> x1sq , ctx -> x1z1 , ctx -> a );
347+ ctx -> subState = 11 ;
348+ break ;
349+ case 11 :
350+ fe_mul_c (ctx -> zm , ctx -> x1sq , 4 );
351+ ctx -> subState = 12 ;
352+ break ;
353+ case 12 :
354+ /* Diffadd2 step 1 */
355+ lm_add (ctx -> a , ctx -> xm , ctx -> zm );
356+ lm_sub (ctx -> b , p , f25519_one );
357+ fe_mul__distinct (ctx -> da , ctx -> a , ctx -> b );
358+ ctx -> subState = 13 ;
359+ break ;
360+ case 13 :
361+ /* Diffadd2 step 2 */
362+ lm_sub (ctx -> b , ctx -> xm , ctx -> zm );
363+ lm_add (ctx -> a , p , f25519_one );
364+ fe_mul__distinct (ctx -> cb , ctx -> a , ctx -> b );
365+ ctx -> subState = 14 ;
366+ break ;
367+ case 14 :
368+ /* Diffadd2 step 3 */
369+ lm_add (ctx -> a , ctx -> da , ctx -> cb );
370+ fe_mul__distinct (ctx -> b , ctx -> a , ctx -> a );
371+ ctx -> subState = 15 ;
372+ break ;
373+ case 15 :
374+ /* Diffadd2 step 4 */
375+ fe_mul__distinct (ctx -> xms , ctx -> zm1 , ctx -> b );
376+ ctx -> subState = 16 ;
377+ break ;
378+ case 16 :
379+ /* Diffadd2 step 5 */
380+ lm_sub (ctx -> a , ctx -> da , ctx -> cb );
381+ fe_mul__distinct (ctx -> b , ctx -> a , ctx -> a );
382+ ctx -> subState = 17 ;
383+ break ;
384+ case 17 :
385+ /* Diffadd2 step 6 */
386+ fe_mul__distinct (ctx -> zms , ctx -> xm1 , ctx -> b );
387+ ctx -> subState = 18 ;
388+ break ;
389+ case 18 :
390+ /* Select:
391+ * bit = 1 --> (P_(2m+1), P_(2m))
392+ * bit = 0 --> (P_(2m), P_(2m-1))
393+ */
394+ fe_select (ctx -> xm1 , ctx -> xm1 , ctx -> xm , ctx -> bit );
395+ fe_select (ctx -> zm1 , ctx -> zm1 , ctx -> zm , ctx -> bit );
396+ fe_select (ctx -> xm , ctx -> xm , ctx -> xms , ctx -> bit );
397+ fe_select (ctx -> zm , ctx -> zm , ctx -> zms , ctx -> bit );
398+ -- (ctx -> i );
399+ ctx -> subState = 0 ;
400+ break ;
401+ default :
402+ ctx -> subState = 0 ;
403+ break ;
404+ }
267405 }
268406 else {
269407 ctx -> state = 2 ;
0 commit comments