diff --git a/.github/workflows/os-check.yml b/.github/workflows/os-check.yml index c209b6505d5..c74648bb368 100644 --- a/.github/workflows/os-check.yml +++ b/.github/workflows/os-check.yml @@ -106,7 +106,14 @@ jobs: 'CPPFLAGS=-DNO_WOLFSSL_CLIENT', 'CPPFLAGS=-DNO_WOLFSSL_SERVER', '--enable-lms=small,verify-only --enable-xmss=small,verify-only', - '--enable-curve25519=nonblock --enable-ecc=nonblock --enable-sp=yes,nonblock CPPFLAGS="-DWOLFSSL_PUBLIC_MP -DWOLFSSL_DEBUG_NONBLOCK"', + # Non-blocking ECC + Curve25519 + RSA + DH on the default SP word + # size for the host (sp_c64.c on x86_64). RSA/DH non-block require + # RSA_LOW_MEM (CRT path is not supported in non-block mode). + '--enable-curve25519=nonblock --enable-ecc=nonblock --enable-rsa=nonblock --enable-dh=nonblock --enable-sp=yes,nonblock CPPFLAGS="-DWOLFSSL_PUBLIC_MP -DWOLFSSL_DEBUG_NONBLOCK -DRSA_LOW_MEM"', + # Same configuration but force SP_WORD_SIZE=32 to exercise sp_c32.c + # on a 64-bit host. The two builds together cover both generated + # variants of mod_exp__nb / RSA / DH wrappers. + '--enable-curve25519=nonblock --enable-ecc=nonblock --enable-rsa=nonblock --enable-dh=nonblock --enable-sp=yes,nonblock CPPFLAGS="-DWOLFSSL_PUBLIC_MP -DWOLFSSL_DEBUG_NONBLOCK -DRSA_LOW_MEM -DSP_WORD_SIZE=32"', '--enable-certreq --enable-certext --enable-certgen --disable-secure-renegotiation-info CPPFLAGS="-DNO_TLS"', ] name: make check linux diff --git a/configure.ac b/configure.ac index 786514eea32..8a6e2a6f0d1 100644 --- a/configure.ac +++ b/configure.ac @@ -4880,6 +4880,15 @@ then test -z "$enable_asynccrypt_sw" && enable_asynccrypt_sw=yes fi +# Handle RSA/DH nonblock - the SP non-blocking dispatch wants the same +# WOLFSSL_ASYNC_CRYPT_SW shim that ECC/Curve25519 nonblock use so the +# TLS layer can manage per-SSL nb contexts and yield MP_WOULDBLOCK. +if test "$enable_rsa" = "nonblock" || test "$enable_dh" = "nonblock" +then + test -z "$enable_asynccrypt" && enable_asynccrypt=yes + test -z "$enable_asynccrypt_sw" && enable_asynccrypt_sw=yes +fi + if test "$ENABLED_CURVE25519" = "no" && test "$ENABLED_QUIC" = "yes" && test "$ENABLED_FIPS" = "no" then ENABLED_CURVE25519=yes @@ -5396,7 +5405,7 @@ fi # RSA AC_ARG_ENABLE([rsa], - [AS_HELP_STRING([--enable-rsa],[Enable RSA (default: enabled)])], + [AS_HELP_STRING([--enable-rsa],[Enable RSA (default: enabled). Set to "nonblock" to enable non-blocking RSA via TFM fp_exptmod_nb or SP small mod_exp_nb])], [ ENABLED_RSA=$enableval ], [ ENABLED_RSA=yes ] ) @@ -5404,6 +5413,17 @@ AC_ARG_ENABLE([rsa], if test "$ENABLED_RSA" = "no" then AM_CFLAGS="$AM_CFLAGS -DNO_RSA" +elif test "$ENABLED_RSA" = "nonblock" +then + AM_CFLAGS="$AM_CFLAGS -DWC_RSA_NONBLOCK" + ENABLED_RSA=yes + ENABLED_CERTS=yes + # asynccrypt + asynccrypt-sw are auto-enabled earlier in this file when + # --enable-rsa=nonblock is detected, so the TLS layer can pick up the + # per-SSL nb context and yield MP_WOULDBLOCK. RSA_LOW_MEM is left as a + # user choice - the SP non-block backend's compile-time check in + # wolfssl/wolfcrypt/rsa.h enforces it for SP, while the TFM (fastmath) + # backend supports the CRT path without it. else # turn off RSA if leanpsk or leantls on if test "$ENABLED_LEANPSK" = "yes" || test "$ENABLED_LEANTLS" = "yes" @@ -5483,7 +5503,7 @@ fi # DH AC_ARG_ENABLE([dh], - [AS_HELP_STRING([--enable-dh],[Enable DH (default: enabled)])], + [AS_HELP_STRING([--enable-dh],[Enable DH (default: enabled). Set to "nonblock" to enable non-blocking DH key agreement via SP small mod_exp_nb])], [ ENABLED_DH=$enableval ], [ ENABLED_DH=yes ] ) @@ -5496,6 +5516,11 @@ fi if test "$ENABLED_DH" = "no" then AM_CFLAGS="$AM_CFLAGS -DNO_DH" +elif test "$ENABLED_DH" = "nonblock" +then + AM_CFLAGS="$AM_CFLAGS -DWC_DH_NONBLOCK" + ENABLED_DH=yes + # asynccrypt + asynccrypt-sw are auto-enabled earlier in this file. else # turn off DH if leanpsk or leantls on if test "$ENABLED_LEANPSK" = "yes" || test "$ENABLED_LEANTLS" = "yes" diff --git a/src/internal.c b/src/internal.c index 6dc0cbe2d1f..119afe4ca3b 100644 --- a/src/internal.c +++ b/src/internal.c @@ -8300,6 +8300,15 @@ void FreeKey(WOLFSSL* ssl, int type, void** pKey) switch (type) { #ifndef NO_RSA case DYNAMIC_TYPE_RSA: + #if defined(WC_RSA_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_RSA) + if (((RsaKey*)*pKey)->nb != NULL) { + XFREE(((RsaKey*)*pKey)->nb, ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + ((RsaKey*)*pKey)->nb = NULL; + } + #endif /* WC_RSA_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_RSA */ wc_FreeRsaKey((RsaKey*)*pKey); break; #endif /* ! NO_RSA */ @@ -8355,6 +8364,15 @@ void FreeKey(WOLFSSL* ssl, int type, void** pKey) #endif /* HAVE_DILITHIUM */ #ifndef NO_DH case DYNAMIC_TYPE_DH: + #if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_DH) + if (((DhKey*)*pKey)->nb != NULL) { + XFREE(((DhKey*)*pKey)->nb, ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + ((DhKey*)*pKey)->nb = NULL; + } + #endif /* WC_DH_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_DH */ wc_FreeDhKey((DhKey*)*pKey); break; #endif /* !NO_DH */ @@ -8385,6 +8403,14 @@ int AllocKey(WOLFSSL* ssl, int type, void** pKey) #if defined(WC_X25519_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) x25519_nb_ctx_t* x25519NbCtx; #endif /* WC_X25519_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW */ +#if !defined(NO_RSA) && defined(WC_RSA_NONBLOCK) && \ + defined(WOLFSSL_ASYNC_CRYPT_SW) && defined(WC_ASYNC_ENABLE_RSA) + RsaNb* rsaNb; +#endif +#if !defined(NO_DH) && defined(WC_DH_NONBLOCK) && \ + defined(WOLFSSL_ASYNC_CRYPT_SW) && defined(WC_ASYNC_ENABLE_DH) + DhNb* dhNb; +#endif if (ssl == NULL || pKey == NULL) { return BAD_FUNC_ARG; @@ -8464,6 +8490,26 @@ int AllocKey(WOLFSSL* ssl, int type, void** pKey) #ifndef NO_RSA case DYNAMIC_TYPE_RSA: ret = wc_InitRsaKey_ex((RsaKey*)*pKey, ssl->heap, ssl->devId); + #if defined(WC_RSA_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_RSA) + /* Only set non-blocking context when async device is active. With + * INVALID_DEVID there is no async loop to retry on MP_WOULDBLOCK, so + * skip non-blocking setup and use blocking mode instead. */ + if (ret == 0 && ssl->devId != INVALID_DEVID) { + rsaNb = (RsaNb*)XMALLOC(sizeof(RsaNb), ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (rsaNb == NULL) { + ret = MEMORY_E; + } + else { + ret = wc_RsaSetNonBlock((RsaKey*)*pKey, rsaNb); + if (ret != 0) { + XFREE(rsaNb, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + } + #endif /* WC_RSA_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_RSA */ break; #endif /* ! NO_RSA */ #ifdef HAVE_ECC @@ -8551,6 +8597,26 @@ int AllocKey(WOLFSSL* ssl, int type, void** pKey) #ifndef NO_DH case DYNAMIC_TYPE_DH: ret = wc_InitDhKey_ex((DhKey*)*pKey, ssl->heap, ssl->devId); + #if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_DH) + /* Only set non-blocking context when async device is active. With + * INVALID_DEVID there is no async loop to retry on MP_WOULDBLOCK, so + * skip non-blocking setup and use blocking mode instead. */ + if (ret == 0 && ssl->devId != INVALID_DEVID) { + dhNb = (DhNb*)XMALLOC(sizeof(DhNb), ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (dhNb == NULL) { + ret = MEMORY_E; + } + else { + ret = wc_DhSetNonBlock((DhKey*)*pKey, dhNb); + if (ret != 0) { + XFREE(dhNb, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + } + #endif /* WC_DH_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_DH */ break; #endif /* !NO_DH */ default: diff --git a/src/tls.c b/src/tls.c index 7f7e0b0d1bf..48080c99625 100644 --- a/src/tls.c +++ b/src/tls.c @@ -8010,6 +8010,26 @@ static int TLSX_KeyShare_GenDhKey(WOLFSSL *ssl, KeyShareEntry* kse) ret = wc_DhSetNamedKey(dhKey, kse->group); #endif } + #if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_DH) + /* Only set non-blocking context when async device is active. With + * INVALID_DEVID there is no async loop to retry on MP_WOULDBLOCK, so + * skip non-blocking setup and use blocking mode instead. */ + if (ret == 0 && ssl->devId != INVALID_DEVID) { + DhNb* dhNb = (DhNb*)XMALLOC(sizeof(DhNb), ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (dhNb == NULL) { + ret = MEMORY_E; + } + else { + ret = wc_DhSetNonBlock((DhKey*)kse->key, dhNb); + if (ret != 0) { + XFREE(dhNb, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + } + #endif /* WC_DH_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_DH */ } /* Allocate space for the private and public key */ @@ -9297,6 +9317,26 @@ static int TLSX_KeyShare_ProcessDh(WOLFSSL* ssl, KeyShareEntry* keyShareEntry) ret = wc_DhSetNamedKey(dhKey, keyShareEntry->group); #endif } + #if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_ASYNC_CRYPT_SW) && \ + defined(WC_ASYNC_ENABLE_DH) + /* Only set non-blocking context when async device is active. With + * INVALID_DEVID there is no async loop to retry on MP_WOULDBLOCK, so + * skip non-blocking setup and use blocking mode instead. */ + if (ret == 0 && ssl->devId != INVALID_DEVID) { + DhNb* dhNb = (DhNb*)XMALLOC(sizeof(DhNb), ssl->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (dhNb == NULL) { + ret = MEMORY_E; + } + else { + ret = wc_DhSetNonBlock((DhKey*)keyShareEntry->key, dhNb); + if (ret != 0) { + XFREE(dhNb, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + } + #endif /* WC_DH_NONBLOCK && WOLFSSL_ASYNC_CRYPT_SW && + WC_ASYNC_ENABLE_DH */ } if (ret == 0 diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 5e2863c343c..8d6164b8996 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -972,6 +972,10 @@ int wc_InitDhKey_ex(DhKey* key, void* heap, int devId) key->handle = NULL; #endif +#ifdef WC_DH_NONBLOCK + key->nb = NULL; +#endif + return ret; } @@ -980,6 +984,23 @@ int wc_InitDhKey(DhKey* key) return wc_InitDhKey_ex(key, NULL, INVALID_DEVID); } +#ifdef WC_DH_NONBLOCK +int wc_DhSetNonBlock(DhKey* key, DhNb* nb) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + if (nb != NULL) { + XMEMSET(nb, 0, sizeof(DhNb)); + } + + /* Pass NULL to disable non-blocking mode. */ + key->nb = nb; + + return 0; +} +#endif + int wc_FreeDhKey(DhKey* key) { @@ -2043,6 +2064,36 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, return DH_CHECK_PUB_E; } +#if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_HAVE_SP_DH) && \ + defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) + /* Non-blocking dispatch bypasses the mp_int dance entirely - the SP + * wrapper takes byte buffers and persists across yields. The constant- + * time fold-back (ct branch) is intentionally not applied here; nb + * callers should use the standard wc_DhAgree(). */ + if (key->nb != NULL && !ct) { + #ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->p) == 2048) { + return sp_DhExp_2048_nb(&key->nb->sp_ctx, otherPub, pubSz, + priv, privSz, &key->p, agree, agreeSz); + } + #endif + #ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->p) == 3072) { + return sp_DhExp_3072_nb(&key->nb->sp_ctx, otherPub, pubSz, + priv, privSz, &key->p, agree, agreeSz); + } + #endif + #ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) { + return sp_DhExp_4096_nb(&key->nb->sp_ctx, otherPub, pubSz, + priv, privSz, &key->p, agree, agreeSz); + } + #endif + /* size not nb-supported - fall through to blocking path */ + } +#endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); if (y == NULL) @@ -2304,6 +2355,10 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv, ret = KcapiDh_SharedSecret(key, otherPub, pubSz, agree, agreeSz); #else #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) + /* Async marker takes precedence: when wc_AsyncSimulate re-enters the + * compute path, wc_DhAgree_Async dispatches to the SP nonblock wrapper + * if key->nb is attached, and per-yield MP_WOULDBLOCK is translated to + * WC_PENDING_E by wc_AsyncSimulate so the TLS event loop drives it. */ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) { ret = wc_DhAgree_Async(key, agree, agreeSz, priv, privSz, otherPub, pubSz); @@ -2311,6 +2366,9 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv, else #endif { + /* wc_DhAgree_Sync handles key->nb internally; no separate dispatch + * needed here. wc_DhAgree_ct (constant-time fold-back) bypasses + * this function entirely so passing ct=0 is correct. */ ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz, 0); } diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index b3fbb83fe0f..02e0bb05c89 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -2288,12 +2288,63 @@ static int wc_RsaFunctionNonBlock(const byte* in, word32 inLen, byte* out, word32* outLen, int type, RsaKey* key) { int ret = 0; +#ifdef USE_FAST_MATH word32 keyLen, len; +#endif +#if defined(WOLFSSL_HAVE_SP_RSA) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) + int bits; +#endif if (key == NULL || key->nb == NULL) { return BAD_FUNC_ARG; } +#if defined(WOLFSSL_HAVE_SP_RSA) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) + bits = mp_count_bits(&key->n); +#ifndef WOLFSSL_SP_NO_2048 + if (bits == 2048) { + if (type == RSA_PUBLIC_ENCRYPT || type == RSA_PUBLIC_DECRYPT) { + return sp_RsaPublic_2048_nb(&key->nb->sp_ctx, in, inLen, + &key->e, &key->n, out, outLen); + } + #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) + return sp_RsaPrivate_2048_nb(&key->nb->sp_ctx, in, inLen, + &key->d, &key->n, out, outLen); + #endif + } +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (bits == 3072) { + if (type == RSA_PUBLIC_ENCRYPT || type == RSA_PUBLIC_DECRYPT) { + return sp_RsaPublic_3072_nb(&key->nb->sp_ctx, in, inLen, + &key->e, &key->n, out, outLen); + } + #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) + return sp_RsaPrivate_3072_nb(&key->nb->sp_ctx, in, inLen, + &key->d, &key->n, out, outLen); + #endif + } +#endif +#ifdef WOLFSSL_SP_4096 + if (bits == 4096) { + if (type == RSA_PUBLIC_ENCRYPT || type == RSA_PUBLIC_DECRYPT) { + return sp_RsaPublic_4096_nb(&key->nb->sp_ctx, in, inLen, + &key->e, &key->n, out, outLen); + } + #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) + return sp_RsaPrivate_4096_nb(&key->nb->sp_ctx, in, inLen, + &key->d, &key->n, out, outLen); + #endif + } +#endif +#endif /* SP nonblock RSA */ + +#ifdef USE_FAST_MATH if (key->nb->exptmod.state == TFM_EXPTMOD_NB_INIT) { if (mp_init(&key->nb->tmp) != MP_OKAY) { ret = MP_INIT_E; @@ -2357,6 +2408,18 @@ static int wc_RsaFunctionNonBlock(const byte* in, word32 inLen, byte* out, } mp_clear(&key->nb->tmp); +#else + /* No non-blocking backend available for this build. The SP non-block + * dispatch above only matches enabled key sizes; if we reach this + * point the key is not 2048/3072/4096 (or SP RSA itself isn't built) + * and TFM fastmath isn't compiled in either. */ + (void)in; + (void)inLen; + (void)out; + (void)outLen; + (void)type; + ret = NOT_COMPILED_IN; +#endif /* USE_FAST_MATH */ return ret; } @@ -3142,6 +3205,18 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out, } #endif /* WOLFSSL_ASYNC_CRYPT_SW */ +#ifdef WC_RSA_NONBLOCK + /* When a non-blocking context is attached and the SP nonblock backend + * is available, drive the chunked state machine here. wc_AsyncSimulate + * (line "if (ret == MP_WOULDBLOCK) ret = WC_PENDING_E;" at the bottom + * of the SW switch in wolfcrypt/src/async.c) translates per-yield + * MP_WOULDBLOCK into WC_PENDING_E so the TLS / async event loop can + * drive the operation to completion. */ + if (key->nb != NULL) { + return wc_RsaFunctionNonBlock(in, inLen, out, outLen, type, key); + } +#endif + switch (type) { #ifndef WOLFSSL_RSA_PUBLIC_ONLY case RSA_PRIVATE_DECRYPT: @@ -3507,12 +3582,19 @@ static int wc_RsaFunction_ex(const byte* in, word32 inLen, byte* out, #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && key->n.raw.len > 0) { + /* wc_RsaFunctionAsync dispatches to the SP nonblock state machine + * in its compute path when key->nb is attached - wc_AsyncSimulate + * translates per-yield MP_WOULDBLOCK into WC_PENDING_E so the + * TLS / async event loop can drive completion. */ ret = wc_RsaFunctionAsync(in, inLen, out, outLen, type, key, rng); } else #endif #ifdef WC_RSA_NONBLOCK if (key->nb) { + /* Direct (non-async) nonblock dispatch - the caller (e.g. wolfcrypt + * test) drives the loop on MP_WOULDBLOCK directly. Reached when no + * async marker is set on the key. */ ret = wc_RsaFunctionNonBlock(in, inLen, out, outLen, type, key); } else @@ -5692,7 +5774,7 @@ int wc_RsaSetNonBlock(RsaKey* key, RsaNb* nb) return 0; } -#ifdef WC_RSA_NONBLOCK_TIME +#if defined(WC_RSA_NONBLOCK_TIME) && defined(USE_FAST_MATH) int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs, word32 cpuMHz) { if (key == NULL || key->nb == NULL) { @@ -5704,7 +5786,7 @@ int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs, word32 cpuMHz) return 0; } -#endif /* WC_RSA_NONBLOCK_TIME */ +#endif /* WC_RSA_NONBLOCK_TIME && USE_FAST_MATH */ #endif /* WC_RSA_NONBLOCK */ #ifndef WOLFSSL_RSA_PUBLIC_ONLY diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 16889893c91..2bbaa724f49 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -3510,6 +3510,141 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_2048_mod_exp_72_ctx { + int state; + sp_digit td[3 * 144]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_2048_mod_exp_72_ctx; + +static int sp_2048_mod_exp_72_nb(sp_2048_mod_exp_72_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 72 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 72U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_2048_mont_setup(m, &ctx->mp); + sp_2048_mont_norm_72(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_2048_mod_72(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 72U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_2048_mul_72(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_2048_mod_72(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 29; + ctx->c = ((ctx->bits - 1) % 29) + 1; + ctx->n = e[ctx->i--] << (29 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 29; + } + ctx->y = (byte)((ctx->n >> 28) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_2048_mont_mul_72(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 72 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_2048_mont_sqr_72(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 72 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_2048_mont_reduce_72(ctx->t[0], m, ctx->mp); + ctx->n = sp_2048_cmp_72(ctx->t[0], m); + sp_2048_cond_sub_72(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 31)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 72 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ @@ -3715,6 +3850,105 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, #endif /* WOLFSSL_SP_SMALL */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_RsaPublic_nb_ctx { + int state; + sp_2048_mod_exp_72_ctx mod_exp_ctx; + sp_digit a[72 * 2]; + sp_digit m[72]; + sp_digit e[3]; + int e_bits; +} sp_2048_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_2048_RsaPublic_nb_ctx* ctx = + (sp_2048_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 256U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_2048_from_bin(ctx->a, 72, in, inLen); + sp_2048_from_mp(ctx->m, 72, mm); + sp_2048_from_mp(ctx->e, 3, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_2048_mod_exp_72_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_2048_to_bin_72(ctx->a, out); + *outLen = 256; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) #endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ @@ -4009,6 +4243,100 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_2048_RsaPrivate_nb_ctx { + int state; + sp_2048_mod_exp_72_ctx mod_exp_ctx; + sp_digit a[72 * 2]; + sp_digit m[72]; + sp_digit d[72]; +} sp_2048_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_2048_RsaPrivate_nb_ctx* ctx = + (sp_2048_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 256U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + break; + } + if (inLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_2048_from_bin(ctx->a, 72, in, inLen); + sp_2048_from_mp(ctx->d, 72, dm); + sp_2048_from_mp(ctx->m, 72, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_72_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 2048, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_2048_to_bin_72(ctx->a, out); + *outLen = 256; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -4180,6 +4508,91 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_ModExp_nb_ctx { + int state; + sp_2048_mod_exp_72_ctx mod_exp_ctx; + sp_digit b[72 * 2]; + sp_digit e[72]; + sp_digit m[72]; + int expBits; +} sp_2048_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_2048_mod_exp_72_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_2048_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_2048_ModExp_nb_ctx* ctx = + (sp_2048_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 2048) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_2048_from_mp(ctx->b, 72, base); + sp_2048_from_mp(ctx->e, 72, exp); + sp_2048_from_mp(ctx->m, 72, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_72_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_2048_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 @@ -4525,6 +4938,105 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_DhExp_nb_ctx { + int state; + sp_2048_mod_exp_72_ctx mod_exp_ctx; + sp_digit b[72 * 2]; + sp_digit e[72]; + sp_digit m[72]; + word32 expLen; +} sp_2048_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 256). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 256). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_2048_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_2048_DhExp_nb_ctx* ctx = + (sp_2048_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 256U) { + err = MP_READ_E; + break; + } + if (expLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_2048_from_bin(ctx->b, 72, base, baseSz); + sp_2048_from_bin(ctx->e, 72, exp, expLen); + sp_2048_from_mp(ctx->m, 72, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_72_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_2048_to_bin_72(ctx->b, out); + *outLen = 256; + for (i = 0; i < 256U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. MP integer. @@ -6617,6 +7129,141 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_3072_mod_exp_106_ctx { + int state; + sp_digit td[3 * 212]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_3072_mod_exp_106_ctx; + +static int sp_3072_mod_exp_106_nb(sp_3072_mod_exp_106_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 106 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 106U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_3072_mont_setup(m, &ctx->mp); + sp_3072_mont_norm_106(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_3072_mod_106(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 106U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_3072_mul_106(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_3072_mod_106(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 29; + ctx->c = ((ctx->bits - 1) % 29) + 1; + ctx->n = e[ctx->i--] << (29 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 29; + } + ctx->y = (byte)((ctx->n >> 28) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_3072_mont_mul_106(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 106 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_3072_mont_sqr_106(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 106 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_3072_mont_reduce_106(ctx->t[0], m, ctx->mp); + ctx->n = sp_3072_cmp_106(ctx->t[0], m); + sp_3072_cond_sub_106(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 31)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 106 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_RSA @@ -6814,11 +7461,110 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } - SP_FREE_VAR(d, NULL, DYNAMIC_TYPE_RSA); - + SP_FREE_VAR(d, NULL, DYNAMIC_TYPE_RSA); + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_RsaPublic_nb_ctx { + int state; + sp_3072_mod_exp_106_ctx mod_exp_ctx; + sp_digit a[106 * 2]; + sp_digit m[106]; + sp_digit e[3]; + int e_bits; +} sp_3072_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_3072_RsaPublic_nb_ctx* ctx = + (sp_3072_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 384U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_3072_from_bin(ctx->a, 106, in, inLen); + sp_3072_from_mp(ctx->m, 106, mm); + sp_3072_from_mp(ctx->e, 3, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_3072_mod_exp_106_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_3072_to_bin_106(ctx->a, out); + *outLen = 384; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } return err; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) @@ -7114,6 +7860,100 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_3072_RsaPrivate_nb_ctx { + int state; + sp_3072_mod_exp_106_ctx mod_exp_ctx; + sp_digit a[106 * 2]; + sp_digit m[106]; + sp_digit d[106]; +} sp_3072_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_3072_RsaPrivate_nb_ctx* ctx = + (sp_3072_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 384U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + break; + } + if (inLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_3072_from_bin(ctx->a, 106, in, inLen); + sp_3072_from_mp(ctx->d, 106, dm); + sp_3072_from_mp(ctx->m, 106, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_106_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 3072, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_3072_to_bin_106(ctx->a, out); + *outLen = 384; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -7285,6 +8125,91 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_ModExp_nb_ctx { + int state; + sp_3072_mod_exp_106_ctx mod_exp_ctx; + sp_digit b[106 * 2]; + sp_digit e[106]; + sp_digit m[106]; + int expBits; +} sp_3072_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_3072_mod_exp_106_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_3072_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_3072_ModExp_nb_ctx* ctx = + (sp_3072_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 3072) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_3072_from_mp(ctx->b, 106, base); + sp_3072_from_mp(ctx->e, 106, exp); + sp_3072_from_mp(ctx->m, 106, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_106_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_3072_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 @@ -7480,6 +8405,105 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_DhExp_nb_ctx { + int state; + sp_3072_mod_exp_106_ctx mod_exp_ctx; + sp_digit b[106 * 2]; + sp_digit e[106]; + sp_digit m[106]; + word32 expLen; +} sp_3072_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 384). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 384). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_3072_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_3072_DhExp_nb_ctx* ctx = + (sp_3072_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 384U) { + err = MP_READ_E; + break; + } + if (expLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_3072_from_bin(ctx->b, 106, base, baseSz); + sp_3072_from_bin(ctx->e, 106, exp, expLen); + sp_3072_from_mp(ctx->m, 106, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_106_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_3072_to_bin_106(ctx->b, out); + *outLen = 384; + for (i = 0; i < 384U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. MP integer. @@ -10380,6 +11404,141 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_3072_mod_exp_112_ctx { + int state; + sp_digit td[3 * 224]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_3072_mod_exp_112_ctx; + +static int sp_3072_mod_exp_112_nb(sp_3072_mod_exp_112_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 112 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 112U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_3072_mont_setup(m, &ctx->mp); + sp_3072_mont_norm_112(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_3072_mod_112(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 112U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_3072_mul_112(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_3072_mod_112(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 28; + ctx->c = ((ctx->bits - 1) % 28) + 1; + ctx->n = e[ctx->i--] << (28 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 28; + } + ctx->y = (byte)((ctx->n >> 27) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_3072_mont_mul_112(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 112 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_3072_mont_sqr_112(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 112 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_3072_mont_reduce_112(ctx->t[0], m, ctx->mp); + ctx->n = sp_3072_cmp_112(ctx->t[0], m); + sp_3072_cond_sub_112(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 31)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 112 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ @@ -13564,11 +14723,146 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e XMEMCPY(r, rt, sizeof(sp_digit) * 284); } - SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + return err; +#endif +} +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_4096_mod_exp_142_ctx { + int state; + sp_digit td[3 * 284]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_4096_mod_exp_142_ctx; + +static int sp_4096_mod_exp_142_nb(sp_4096_mod_exp_142_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 142 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 142U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_4096_mont_setup(m, &ctx->mp); + sp_4096_mont_norm_142(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_4096_mod_142(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 142U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_4096_mul_142(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_4096_mod_142(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 29; + ctx->c = ((ctx->bits - 1) % 29) + 1; + ctx->n = e[ctx->i--] << (29 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 29; + } + ctx->y = (byte)((ctx->n >> 28) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_4096_mont_mul_142(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 142 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_4096_mont_sqr_142(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 142 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_4096_mont_reduce_142(ctx->t[0], m, ctx->mp); + ctx->n = sp_4096_cmp_142(ctx->t[0], m); + sp_4096_cond_sub_142(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 31)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 142 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } return err; -#endif } +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_RSA @@ -13772,6 +15066,105 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, #endif /* WOLFSSL_SP_SMALL */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_RsaPublic_nb_ctx { + int state; + sp_4096_mod_exp_142_ctx mod_exp_ctx; + sp_digit a[142 * 2]; + sp_digit m[142]; + sp_digit e[3]; + int e_bits; +} sp_4096_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_4096_RsaPublic_nb_ctx* ctx = + (sp_4096_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 512U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_4096_from_bin(ctx->a, 142, in, inLen); + sp_4096_from_mp(ctx->m, 142, mm); + sp_4096_from_mp(ctx->e, 3, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_4096_mod_exp_142_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_4096_to_bin_142(ctx->a, out); + *outLen = 512; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) #endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ @@ -14066,6 +15459,100 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_4096_RsaPrivate_nb_ctx { + int state; + sp_4096_mod_exp_142_ctx mod_exp_ctx; + sp_digit a[142 * 2]; + sp_digit m[142]; + sp_digit d[142]; +} sp_4096_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_4096_RsaPrivate_nb_ctx* ctx = + (sp_4096_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 512U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + break; + } + if (inLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_4096_from_bin(ctx->a, 142, in, inLen); + sp_4096_from_mp(ctx->d, 142, dm); + sp_4096_from_mp(ctx->m, 142, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_142_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 4096, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_4096_to_bin_142(ctx->a, out); + *outLen = 512; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -14237,6 +15724,91 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_ModExp_nb_ctx { + int state; + sp_4096_mod_exp_142_ctx mod_exp_ctx; + sp_digit b[142 * 2]; + sp_digit e[142]; + sp_digit m[142]; + int expBits; +} sp_4096_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_4096_mod_exp_142_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_4096_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_4096_ModExp_nb_ctx* ctx = + (sp_4096_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 4096) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_4096_from_mp(ctx->b, 142, base); + sp_4096_from_mp(ctx->e, 142, exp); + sp_4096_from_mp(ctx->m, 142, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_142_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_4096_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 @@ -14432,6 +16004,105 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_DhExp_nb_ctx { + int state; + sp_4096_mod_exp_142_ctx mod_exp_ctx; + sp_digit b[142 * 2]; + sp_digit e[142]; + sp_digit m[142]; + word32 expLen; +} sp_4096_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 512). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 512). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_4096_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_4096_DhExp_nb_ctx* ctx = + (sp_4096_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 512U) { + err = MP_READ_E; + break; + } + if (expLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_4096_from_bin(ctx->b, 142, base, baseSz); + sp_4096_from_bin(ctx->e, 142, exp, expLen); + sp_4096_from_mp(ctx->m, 142, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_142_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_4096_to_bin_142(ctx->b, out); + *outLen = 512; + for (i = 0; i < 512U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + #endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */ #else @@ -17240,6 +18911,141 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_4096_mod_exp_162_ctx { + int state; + sp_digit td[3 * 324]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_4096_mod_exp_162_ctx; + +static int sp_4096_mod_exp_162_nb(sp_4096_mod_exp_162_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 162 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 162U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_4096_mont_setup(m, &ctx->mp); + sp_4096_mont_norm_162(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_4096_mod_162(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 162U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_4096_mul_162(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_4096_mod_162(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 26; + ctx->c = ((ctx->bits - 1) % 26) + 1; + ctx->n = e[ctx->i--] << (26 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 26; + } + ctx->y = (byte)((ctx->n >> 25) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_4096_mont_mul_162(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 162 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_4096_mont_sqr_162(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 162 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_4096_mont_reduce_162(ctx->t[0], m, ctx->mp); + ctx->n = sp_4096_cmp_162(ctx->t[0], m); + sp_4096_cond_sub_162(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 31)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 162 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 089b8fca839..fb38d3535ae 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -2118,6 +2118,141 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_2048_mod_exp_34_ctx { + int state; + sp_digit td[3 * 68]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_2048_mod_exp_34_ctx; + +static int sp_2048_mod_exp_34_nb(sp_2048_mod_exp_34_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 34 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 34U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_2048_mont_setup(m, &ctx->mp); + sp_2048_mont_norm_34(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_2048_mod_34(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 34U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_2048_mul_34(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_2048_mod_34(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 61; + ctx->c = ((ctx->bits - 1) % 61) + 1; + ctx->n = e[ctx->i--] << (61 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 61; + } + ctx->y = (byte)((ctx->n >> 60) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_2048_mont_mul_34(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 34 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_2048_mont_sqr_34(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 34 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_2048_mont_reduce_34(ctx->t[0], m, ctx->mp); + ctx->n = sp_2048_cmp_34(ctx->t[0], m); + sp_2048_cond_sub_34(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 34 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_RSA @@ -2321,6 +2456,105 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, #endif /* WOLFSSL_SP_SMALL */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_RsaPublic_nb_ctx { + int state; + sp_2048_mod_exp_34_ctx mod_exp_ctx; + sp_digit a[34 * 2]; + sp_digit m[34]; + sp_digit e[2]; + int e_bits; +} sp_2048_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_2048_RsaPublic_nb_ctx* ctx = + (sp_2048_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 256U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_2048_from_bin(ctx->a, 34, in, inLen); + sp_2048_from_mp(ctx->m, 34, mm); + sp_2048_from_mp(ctx->e, 2, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_2048_mod_exp_34_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_2048_to_bin_34(ctx->a, out); + *outLen = 256; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) #endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ @@ -2615,6 +2849,100 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_2048_RsaPrivate_nb_ctx { + int state; + sp_2048_mod_exp_34_ctx mod_exp_ctx; + sp_digit a[34 * 2]; + sp_digit m[34]; + sp_digit d[34]; +} sp_2048_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_2048_RsaPrivate_nb_ctx* ctx = + (sp_2048_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 256U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + break; + } + if (inLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_2048_from_bin(ctx->a, 34, in, inLen); + sp_2048_from_mp(ctx->d, 34, dm); + sp_2048_from_mp(ctx->m, 34, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_34_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 2048, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_2048_to_bin_34(ctx->a, out); + *outLen = 256; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -2786,6 +3114,91 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_ModExp_nb_ctx { + int state; + sp_2048_mod_exp_34_ctx mod_exp_ctx; + sp_digit b[34 * 2]; + sp_digit e[34]; + sp_digit m[34]; + int expBits; +} sp_2048_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_2048_mod_exp_34_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_2048_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_2048_ModExp_nb_ctx* ctx = + (sp_2048_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 2048) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_2048_from_mp(ctx->b, 34, base); + sp_2048_from_mp(ctx->e, 34, exp); + sp_2048_from_mp(ctx->m, 34, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_34_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_2048_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 @@ -2982,6 +3395,105 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_2048_DhExp_nb_ctx { + int state; + sp_2048_mod_exp_34_ctx mod_exp_ctx; + sp_digit b[34 * 2]; + sp_digit e[34]; + sp_digit m[34]; + word32 expLen; +} sp_2048_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 256). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 256). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_2048_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_2048_DhExp_nb_ctx* ctx = + (sp_2048_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_2048_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 256U) { + err = MP_READ_E; + break; + } + if (expLen > 256U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_2048_from_bin(ctx->b, 34, base, baseSz); + sp_2048_from_bin(ctx->e, 34, exp, expLen); + sp_2048_from_mp(ctx->m, 34, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_2048_mod_exp_34_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_2048_to_bin_34(ctx->b, out); + *outLen = 256; + for (i = 0; i < 256U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. MP integer. @@ -5363,6 +5875,141 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_2048_mod_exp_36_ctx { + int state; + sp_digit td[3 * 72]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_2048_mod_exp_36_ctx; + +static int sp_2048_mod_exp_36_nb(sp_2048_mod_exp_36_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 36 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 36U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_2048_mont_setup(m, &ctx->mp); + sp_2048_mont_norm_36(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_2048_mod_36(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 36U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_2048_mul_36(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_2048_mod_36(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 57; + ctx->c = ((ctx->bits - 1) % 57) + 1; + ctx->n = e[ctx->i--] << (57 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 57; + } + ctx->y = (byte)((ctx->n >> 56) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_2048_mont_mul_36(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 36 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_2048_mont_sqr_36(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 36 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_2048_mont_reduce_36(ctx->t[0], m, ctx->mp); + ctx->n = sp_2048_cmp_36(ctx->t[0], m); + sp_2048_cond_sub_36(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 36 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ @@ -8295,6 +8942,141 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_3072_mod_exp_52_ctx { + int state; + sp_digit td[3 * 104]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_3072_mod_exp_52_ctx; + +static int sp_3072_mod_exp_52_nb(sp_3072_mod_exp_52_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 52 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 52U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_3072_mont_setup(m, &ctx->mp); + sp_3072_mont_norm_52(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_3072_mod_52(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 52U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_3072_mul_52(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_3072_mod_52(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 60; + ctx->c = ((ctx->bits - 1) % 60) + 1; + ctx->n = e[ctx->i--] << (60 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 60; + } + ctx->y = (byte)((ctx->n >> 59) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_3072_mont_mul_52(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 52 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_3072_mont_sqr_52(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 52 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_3072_mont_reduce_52(ctx->t[0], m, ctx->mp); + ctx->n = sp_3072_cmp_52(ctx->t[0], m); + sp_3072_cond_sub_52(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 52 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_RSA @@ -8492,11 +9274,110 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } - SP_FREE_VAR(d, NULL, DYNAMIC_TYPE_RSA); - + SP_FREE_VAR(d, NULL, DYNAMIC_TYPE_RSA); + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_RsaPublic_nb_ctx { + int state; + sp_3072_mod_exp_52_ctx mod_exp_ctx; + sp_digit a[52 * 2]; + sp_digit m[52]; + sp_digit e[2]; + int e_bits; +} sp_3072_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_3072_RsaPublic_nb_ctx* ctx = + (sp_3072_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 384U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_3072_from_bin(ctx->a, 52, in, inLen); + sp_3072_from_mp(ctx->m, 52, mm); + sp_3072_from_mp(ctx->e, 2, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_3072_mod_exp_52_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_3072_to_bin_52(ctx->a, out); + *outLen = 384; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } return err; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) @@ -8792,6 +9673,100 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_3072_RsaPrivate_nb_ctx { + int state; + sp_3072_mod_exp_52_ctx mod_exp_ctx; + sp_digit a[52 * 2]; + sp_digit m[52]; + sp_digit d[52]; +} sp_3072_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_3072_RsaPrivate_nb_ctx* ctx = + (sp_3072_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 384U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + break; + } + if (inLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_3072_from_bin(ctx->a, 52, in, inLen); + sp_3072_from_mp(ctx->d, 52, dm); + sp_3072_from_mp(ctx->m, 52, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_52_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 3072, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_3072_to_bin_52(ctx->a, out); + *outLen = 384; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -8963,6 +9938,91 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_ModExp_nb_ctx { + int state; + sp_3072_mod_exp_52_ctx mod_exp_ctx; + sp_digit b[52 * 2]; + sp_digit e[52]; + sp_digit m[52]; + int expBits; +} sp_3072_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_3072_mod_exp_52_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_3072_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_3072_ModExp_nb_ctx* ctx = + (sp_3072_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 3072) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_3072_from_mp(ctx->b, 52, base); + sp_3072_from_mp(ctx->e, 52, exp); + sp_3072_from_mp(ctx->m, 52, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_52_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_3072_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 @@ -9159,6 +10219,105 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_3072_DhExp_nb_ctx { + int state; + sp_3072_mod_exp_52_ctx mod_exp_ctx; + sp_digit b[52 * 2]; + sp_digit e[52]; + sp_digit m[52]; + word32 expLen; +} sp_3072_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 384). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 384). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_3072_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_3072_DhExp_nb_ctx* ctx = + (sp_3072_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_3072_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 384U) { + err = MP_READ_E; + break; + } + if (expLen > 384U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_3072_from_bin(ctx->b, 52, base, baseSz); + sp_3072_from_bin(ctx->e, 52, exp, expLen); + sp_3072_from_mp(ctx->m, 52, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_3072_mod_exp_52_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_3072_to_bin_52(ctx->b, out); + *outLen = 384; + for (i = 0; i < 384U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. MP integer. @@ -11689,6 +12848,141 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_3072_mod_exp_54_ctx { + int state; + sp_digit td[3 * 108]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_3072_mod_exp_54_ctx; + +static int sp_3072_mod_exp_54_nb(sp_3072_mod_exp_54_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 54 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 54U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_3072_mont_setup(m, &ctx->mp); + sp_3072_mont_norm_54(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_3072_mod_54(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 54U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_3072_mul_54(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_3072_mod_54(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 57; + ctx->c = ((ctx->bits - 1) % 57) + 1; + ctx->n = e[ctx->i--] << (57 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 57; + } + ctx->y = (byte)((ctx->n >> 56) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_3072_mont_mul_54(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 54 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_3072_mont_sqr_54(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 54 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_3072_mont_reduce_54(ctx->t[0], m, ctx->mp); + ctx->n = sp_3072_cmp_54(ctx->t[0], m); + sp_3072_cond_sub_54(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 54 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ @@ -14653,11 +15947,146 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, XMEMCPY(r, rt, sizeof(sp_digit) * 140); } - SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + return err; +#endif +} +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_4096_mod_exp_70_ctx { + int state; + sp_digit td[3 * 140]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_4096_mod_exp_70_ctx; + +static int sp_4096_mod_exp_70_nb(sp_4096_mod_exp_70_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 70 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 70U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_4096_mont_setup(m, &ctx->mp); + sp_4096_mont_norm_70(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_4096_mod_70(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 70U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_4096_mul_70(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_4096_mod_70(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 59; + ctx->c = ((ctx->bits - 1) % 59) + 1; + ctx->n = e[ctx->i--] << (59 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 59; + } + ctx->y = (byte)((ctx->n >> 58) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_4096_mont_mul_70(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 70 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_4096_mont_sqr_70(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 70 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_4096_mont_reduce_70(ctx->t[0], m, ctx->mp); + ctx->n = sp_4096_cmp_70(ctx->t[0], m); + sp_4096_cond_sub_70(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 70 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } return err; -#endif } +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_RSA @@ -14861,6 +16290,105 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, #endif /* WOLFSSL_SP_SMALL */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_RsaPublic_nb_ctx { + int state; + sp_4096_mod_exp_70_ctx mod_exp_ctx; + sp_digit a[70 * 2]; + sp_digit m[70]; + sp_digit e[2]; + int e_bits; +} sp_4096_RsaPublic_nb_ctx; + +/* Non-blocking RSA public key operation. State machine driven by sp_ctx; + * each call advances either an input/output conversion step or one + * sub-state of the inner modular exponentiation, returning MP_WOULDBLOCK + * until the operation completes. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPublic_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* em, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_4096_RsaPublic_nb_ctx* ctx = + (sp_4096_RsaPublic_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_RsaPublic_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT: validate, convert inputs to sp_digit form */ + if (*outLen < 512U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(em) > 64) { + err = MP_READ_E; + break; + } + if (inLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + ctx->e_bits = mp_count_bits(em); + if (ctx->e_bits == 0) { + err = MP_EXPTMOD_E; + break; + } + sp_4096_from_bin(ctx->a, 70, in, inLen); + sp_4096_from_mp(ctx->m, 70, mm); + sp_4096_from_mp(ctx->e, 2, em); + ctx->state = 1; + break; + case 1: /* MODEXP: drive inner mod_exp state machine one step */ + err = sp_4096_mod_exp_70_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->e, ctx->e_bits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN: write big-endian result */ + sp_4096_to_bin_70(ctx->a, out); + *outLen = 512; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #ifndef WOLFSSL_RSA_PUBLIC_ONLY #if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) #endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ @@ -15155,6 +16683,100 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) && \ + (defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)) +typedef struct sp_4096_RsaPrivate_nb_ctx { + int state; + sp_4096_mod_exp_70_ctx mod_exp_ctx; + sp_digit a[70 * 2]; + sp_digit m[70]; + sp_digit d[70]; +} sp_4096_RsaPrivate_nb_ctx; + +/* Non-blocking RSA private key operation - D-only path. + * The CRT path is not supported in non-blocking mode; configure with + * RSA_LOW_MEM or SP_RSA_PRIVATE_EXP_D to enable this entry point. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_TO_E when outLen is too small, or MP_READ_E on input size errors. + */ +int sp_RsaPrivate_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, word32 inLen, + const mp_int* dm, const mp_int* mm, byte* out, word32* outLen) +{ + int err = MP_WOULDBLOCK; + sp_4096_RsaPrivate_nb_ctx* ctx = + (sp_4096_RsaPrivate_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_RsaPrivate_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (*outLen < 512U) { + err = MP_TO_E; + break; + } + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + break; + } + if (inLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mm)) { + err = MP_VAL; + break; + } + sp_4096_from_bin(ctx->a, 70, in, inLen); + sp_4096_from_mp(ctx->d, 70, dm); + sp_4096_from_mp(ctx->m, 70, mm); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_70_nb(&ctx->mod_exp_ctx, ctx->a, + ctx->a, ctx->d, 4096, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_4096_to_bin_70(ctx->a, out); + *outLen = 512; + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP && + * (SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM) */ + #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ @@ -15326,6 +16948,91 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_ModExp_nb_ctx { + int state; + sp_4096_mod_exp_70_ctx mod_exp_ctx; + sp_digit b[70 * 2]; + sp_digit e[70]; + sp_digit m[70]; + int expBits; +} sp_4096_ModExp_nb_ctx; + +/* Non-blocking modular exponentiation for Diffie-Hellman (mp_int form). + * Drives sp_4096_mod_exp_70_nb one sub-state per call. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion, + * MP_READ_E on input size errors, or MP_VAL when modulus is even. + */ +int sp_ModExp_4096_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res) +{ + int err = MP_WOULDBLOCK; + sp_4096_ModExp_nb_ctx* ctx = + (sp_4096_ModExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_ModExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + break; + } + ctx->expBits = mp_count_bits(exp); + if (ctx->expBits > 4096) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_4096_from_mp(ctx->b, 70, base); + sp_4096_from_mp(ctx->e, 70, exp); + sp_4096_from_mp(ctx->m, 70, mod); + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_70_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expBits, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_MP */ + err = sp_4096_to_mp(ctx->b, res); + ctx->state = 3; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && + * !WOLFSSL_SP_FAST_MODEXP */ + #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 @@ -15522,6 +17229,105 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, } #endif /* WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_DH +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +typedef struct sp_4096_DhExp_nb_ctx { + int state; + sp_4096_mod_exp_70_ctx mod_exp_ctx; + sp_digit b[70 * 2]; + sp_digit e[70]; + sp_digit m[70]; + word32 expLen; +} sp_4096_DhExp_nb_ctx; + +/* Non-blocking Diffie-Hellman modular exponentiation. + * Computes base^exp mod mod where base and exp are byte strings; suitable + * for the TLS path where otherPub is already a byte buffer. + * + * sp_ctx Persistent state buffer; first call must have all bytes zero. + * base Base bytes (other party's public key). + * baseSz Length, in bytes, of base (max 512). + * exp Exponent bytes (our private key). + * expLen Length, in bytes, of exp (max 512). + * mod Modulus. MP integer (must remain valid until first call returns). + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns MP_WOULDBLOCK while more work remains, MP_OKAY on completion. + */ +int sp_DhExp_4096_nb(sp_dh_ctx_t* sp_ctx, const byte* base, word32 baseSz, + const byte* exp, word32 expLen, const mp_int* mod, byte* out, + word32* outLen) +{ + int err = MP_WOULDBLOCK; + word32 i; + sp_4096_DhExp_nb_ctx* ctx = + (sp_4096_DhExp_nb_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_4096_DhExp_nb_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (baseSz > 512U) { + err = MP_READ_E; + break; + } + if (expLen > 512U) { + err = MP_READ_E; + break; + } + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + break; + } + if (mp_iseven(mod)) { + err = MP_VAL; + break; + } + sp_4096_from_bin(ctx->b, 70, base, baseSz); + sp_4096_from_bin(ctx->e, 70, exp, expLen); + sp_4096_from_mp(ctx->m, 70, mod); + ctx->expLen = expLen; + ctx->state = 1; + break; + case 1: /* MODEXP */ + err = sp_4096_mod_exp_70_nb(&ctx->mod_exp_ctx, ctx->b, + ctx->b, ctx->e, ctx->expLen * 8U, ctx->m, 0); + if (err == MP_WOULDBLOCK) { + break; + } + if (err != MP_OKAY) { + break; + } + ctx->state = 2; + break; + case 2: /* TO_BIN */ + sp_4096_to_bin_70(ctx->b, out); + *outLen = 512; + for (i = 0; i < 512U && out[i] == 0U; i++) { + /* skip leading zero bytes */ + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + ctx->state = 3; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 3) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx, sizeof(*ctx)); + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ +#endif /* WOLFSSL_HAVE_SP_DH */ + #endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */ #else @@ -18153,6 +19959,141 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, return err; #endif } +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +/* Non-blocking modular exponentiation. State machine driven by sp_ctx; + * each call advances one Montgomery op or one bit-extract step then + * returns MP_WOULDBLOCK until the final reduction completes. + */ +typedef struct sp_4096_mod_exp_78_ctx { + int state; + sp_digit td[3 * 156]; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp; + sp_digit n; + int i; + int c; + byte y; + int reduceA; + int bits; +} sp_4096_mod_exp_78_ctx; + +static int sp_4096_mod_exp_78_nb(sp_4096_mod_exp_78_ctx* ctx, + sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ + int err = MP_WOULDBLOCK; + int j; + + switch (ctx->state) { + case 0: /* INIT: layout td, mont_setup, mont_norm */ + if (bits == 0) { + err = MP_VAL; + break; + } + for (j = 0; j < 3; j++) { + ctx->t[j] = ctx->td + (j * 78 * 2); + XMEMSET(ctx->t[j], 0, sizeof(sp_digit) * 78U * 2U); + } + ctx->norm = ctx->t[0]; + ctx->mp = 1; + ctx->reduceA = reduceA; + ctx->bits = bits; + sp_4096_mont_setup(m, &ctx->mp); + sp_4096_mont_norm_78(ctx->norm, m); + ctx->state = 1; + break; + case 1: /* REDUCE_A: optionally reduce a mod m into t[1] */ + if (ctx->reduceA != 0) { + err = sp_4096_mod_78(ctx->t[1], a, m); + if (err != MP_OKAY) { + break; + } + } + else { + XMEMCPY(ctx->t[1], a, sizeof(sp_digit) * 78U); + } + ctx->state = 2; + break; + case 2: /* NORM_MUL: t[1] = t[1] * norm */ + sp_4096_mul_78(ctx->t[1], ctx->t[1], ctx->norm); + ctx->state = 3; + break; + case 3: /* NORM_MOD: t[1] = t[1] mod m. t[0] aliases norm (= R mod m), + which is the Montgomery form of 1 - leave it as the + accumulator for the bit loop. */ + err = sp_4096_mod_78(ctx->t[1], ctx->t[1], m); + if (err != MP_OKAY) { + break; + } + ctx->state = 4; + break; + case 4: /* BIT_INIT: index the most-significant exponent limb without + reading off the end when bits is an exact multiple of the + limb width. (bits-1) keeps i within the populated range and + c = (bits-1) % @bits + 1 keeps the shift in [1, @bits]. */ + ctx->i = (ctx->bits - 1) / 53; + ctx->c = ((ctx->bits - 1) % 53) + 1; + ctx->n = e[ctx->i--] << (53 - ctx->c); + ctx->state = 5; + break; + case 5: /* BIT_NEXT: refill on word boundary, peel one exponent bit */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 10; + break; + } + ctx->n = e[ctx->i--]; + ctx->c = 53; + } + ctx->y = (byte)((ctx->n >> 52) & 1); + ctx->n <<= 1; + ctx->state = 6; + break; + case 6: /* MUL: t[y^1] = t[0] * t[1] in Montgomery form */ + sp_4096_mont_mul_78(ctx->t[ctx->y ^ 1], ctx->t[0], ctx->t[1], + m, ctx->mp); + ctx->state = 7; + break; + case 7: /* COPY_OUT: constant-time copy &t[y] -> t[2] */ + XMEMCPY(ctx->t[2], (void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_digit) * 78 * 2); + ctx->state = 8; + break; + case 8: /* SQR: t[2] = t[2]^2 in Montgomery form */ + sp_4096_mont_sqr_78(ctx->t[2], ctx->t[2], m, ctx->mp); + ctx->state = 9; + break; + case 9: /* COPY_BACK: constant-time copy t[2] -> &t[y]; advance bit */ + XMEMCPY((void*)(((size_t)ctx->t[0] & addr_mask[ctx->y ^ 1]) + + ((size_t)ctx->t[1] & addr_mask[ctx->y])), ctx->t[2], + sizeof(sp_digit) * 78 * 2); + ctx->c--; + ctx->state = 5; + break; + case 10: /* REDUCE: final mont_reduce + cond_sub */ + sp_4096_mont_reduce_78(ctx->t[0], m, ctx->mp); + ctx->n = sp_4096_cmp_78(ctx->t[0], m); + sp_4096_cond_sub_78(ctx->t[0], ctx->t[0], m, + (sp_digit)~(ctx->n >> 63)); + XMEMCPY(r, ctx->t[0], sizeof(sp_digit) * 78 * 2); + ctx->state = 11; + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 11) { + err = MP_WOULDBLOCK; + } + if (err != MP_WOULDBLOCK) { + ForceZero(ctx->td, sizeof(ctx->td)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 361f16d6319..0f077b39676 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -27555,6 +27555,14 @@ static wc_test_ret_t dh_ffdhe_test(WC_RNG *rng, int name) #endif word32 agreeSz = MAX_DH_KEY_SZ; word32 agreeSz2 = MAX_DH_KEY_SZ; +#if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_HAVE_SP_DH) && \ + defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) + DhNb dhNb; + word32 nbAgreeSz; + int nbCount; + wc_test_ret_t nb_ret; +#endif #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) if ((priv == NULL) || @@ -27641,6 +27649,49 @@ static wc_test_ret_t dh_ffdhe_test(WC_RNG *rng, int name) ERROR_OUT(WC_TEST_RET_ENC_NC, done); } +#if defined(WC_DH_NONBLOCK) && defined(WOLFSSL_HAVE_SP_DH) && \ + defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) + nbAgreeSz = MAX_DH_KEY_SZ; + nbCount = 0; + XMEMSET(agree2, 0, MAX_DH_KEY_SZ); + + nb_ret = wc_DhSetNonBlock(key, &dhNb); + if (nb_ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(nb_ret), done); + + do { + nb_ret = wc_DhAgree(key, agree2, &nbAgreeSz, priv, privSz, + pub2, pubSz2); + nbCount++; + #if defined(WOLFSSL_ASYNC_CRYPT) + /* When async crypt is enabled, the SW shim returns WC_PENDING_E + * on the first call (init phase) and wc_AsyncWait drives the SP + * non-block state machine to completion. wc_AsyncSimulate + * translates each per-yield MP_WOULDBLOCK from sp_DhExp__nb + * into WC_PENDING_E internally so the wait loop polls until the + * operation finishes. */ + if (nb_ret == WC_PENDING_E) { + nb_ret = wc_AsyncWait(nb_ret, &key->asyncDev, + WC_ASYNC_FLAG_NONE); + } + #endif + } while (nb_ret == MP_WOULDBLOCK || nb_ret == WC_PENDING_E); + if (nb_ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(nb_ret), done); + +#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_DEBUG_NONBLOCK) + printf("DH non-block agree: %d times\n", nbCount); +#endif + + if (nbAgreeSz != agreeSz || XMEMCMP(agree, agree2, agreeSz)) + ERROR_OUT(WC_TEST_RET_ENC_NC, done); + + nb_ret = wc_DhSetNonBlock(key, NULL); + if (nb_ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(nb_ret), done); +#endif /* WC_DH_NONBLOCK + SP nonblock */ + /* wc_DhGeneratePublic_fips() was added in 5.2.3, but some customers are * building with configure scripts that set version to 5.2.1, but with 5.2.3 * wolfCrypt sources. 5.3.0 is used for both fips-v5-ready and v5-kcapi, diff --git a/wolfssl/wolfcrypt/dh.h b/wolfssl/wolfcrypt/dh.h index f61025d36db..11689d36786 100644 --- a/wolfssl/wolfcrypt/dh.h +++ b/wolfssl/wolfcrypt/dh.h @@ -49,6 +49,23 @@ #include #endif +#ifdef WC_DH_NONBLOCK + /* Non-blocking DH currently requires the SP small backend with the + * non-blocking + no-malloc + non-fast-modexp trio. */ + #if !defined(WOLFSSL_HAVE_SP_DH) || !defined(WOLFSSL_SP_NONBLOCK) + #error WC_DH_NONBLOCK requires WOLFSSL_HAVE_SP_DH + WOLFSSL_SP_NONBLOCK + #endif + #if !defined(WOLFSSL_SP_SMALL) + #error WC_DH_NONBLOCK requires WOLFSSL_SP_SMALL + #endif + #if !defined(WOLFSSL_SP_NO_MALLOC) + #error WC_DH_NONBLOCK requires WOLFSSL_SP_NO_MALLOC + #endif + #if defined(WOLFSSL_SP_FAST_MODEXP) + #error WC_DH_NONBLOCK is incompatible with WOLFSSL_SP_FAST_MODEXP + #endif +#endif + typedef struct DhParams { #ifdef HAVE_FFDHE_Q const byte* q; @@ -60,6 +77,20 @@ typedef struct DhParams { word32 g_len; } DhParams; +#ifdef WC_DH_NONBLOCK +/* Non-blocking DH context. Holds the SP wrapper state across yields. + * Caller allocates one DhNb per active operation and binds it to a key + * via wc_DhSetNonBlock(). Lifetime must outlast the operation. */ +typedef struct DhNb { +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) + sp_dh_ctx_t sp_ctx; +#else + int unused; +#endif +} DhNb; +#endif + /* Diffie-Hellman Key */ struct DhKey { mp_int p, g, q; /* group parameters */ @@ -75,6 +106,9 @@ struct DhKey { #ifdef WOLFSSL_KCAPI_DH struct kcapi_handle* handle; #endif +#ifdef WC_DH_NONBLOCK + DhNb* nb; /* non-blocking context, NULL when not in non-block mode */ +#endif }; #ifndef WC_DH_TYPE_DEFINED @@ -162,6 +196,10 @@ WOLFSSL_API int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g WOLFSSL_API int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz, const byte* g, word32 gSz, const byte* q, word32 qSz); WOLFSSL_API int wc_DhSetNamedKey(DhKey* key, int name); + +#ifdef WC_DH_NONBLOCK +WOLFSSL_API int wc_DhSetNonBlock(DhKey* key, DhNb* nb); +#endif WOLFSSL_API int wc_DhGetNamedKeyParamSize(int name, word32* p, word32* g, word32* q); WOLFSSL_API word32 wc_DhGetNamedKeyMinSize(int name); diff --git a/wolfssl/wolfcrypt/rsa.h b/wolfssl/wolfcrypt/rsa.h index 8c48fe9e8d8..2afe13a92aa 100644 --- a/wolfssl/wolfcrypt/rsa.h +++ b/wolfssl/wolfcrypt/rsa.h @@ -44,14 +44,36 @@ RSA keys can be used to encrypt, decrypt, sign and verify data. #endif #if defined(WC_RSA_NONBLOCK) - /* enable support for fast math based non-blocking exptmod */ - /* this splits the RSA function into many smaller operations */ - #ifndef USE_FAST_MATH - #error RSA non-blocking mode only supported using fast math + /* Non-blocking RSA splits the operation into many smaller chunks so a + * bare-metal system loop stays responsive. Two backends are supported: + * - TFM fastmath (fp_exptmod_nb), the original implementation. + * - SP small (sp_RsaPublic__nb / sp_RsaPrivate__nb) when SP + * non-blocking is enabled. SP requires RSA_LOW_MEM (CRT not + * supported in non-block mode) and the small / no-malloc / + * non-blocking trio that drives the chunked state machine. */ + #if !defined(USE_FAST_MATH) && \ + !(defined(WOLFSSL_HAVE_SP_RSA) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP)) + #error RSA non-blocking mode requires fast math or SP non-blocking #endif - #ifndef TFM_TIMING_RESISTANT + #if defined(USE_FAST_MATH) && !defined(TFM_TIMING_RESISTANT) #error RSA non-blocking mode only supported with timing resistance enabled #endif + #if defined(WOLFSSL_HAVE_SP_RSA) && defined(WOLFSSL_SP_NONBLOCK) + #if !defined(WOLFSSL_SP_SMALL) + #error SP non-blocking RSA requires WOLFSSL_SP_SMALL + #endif + #if !defined(WOLFSSL_SP_NO_MALLOC) + #error SP non-blocking RSA requires WOLFSSL_SP_NO_MALLOC + #endif + #if defined(WOLFSSL_SP_FAST_MODEXP) + #error SP non-blocking RSA is incompatible with WOLFSSL_SP_FAST_MODEXP + #endif + #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \ + !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) + #error SP non-blocking RSA requires RSA_LOW_MEM (CRT path is unsupported) + #endif + #endif /* RSA bounds check is not supported with RSA non-blocking mode */ #undef NO_RSA_BOUNDS_CHECK @@ -192,8 +214,14 @@ enum { #ifdef WC_RSA_NONBLOCK typedef struct RsaNb { - exptModNb_t exptmod; /* non-block expt_mod */ +#ifdef USE_FAST_MATH + exptModNb_t exptmod; /* TFM non-block expt_mod state */ mp_int tmp; +#endif +#if defined(WOLFSSL_HAVE_SP_RSA) && defined(WOLFSSL_SP_NONBLOCK) && \ + defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) + sp_rsa_ctx_t sp_ctx; /* SP non-block wrapper state */ +#endif } RsaNb; #endif diff --git a/wolfssl/wolfcrypt/sp.h b/wolfssl/wolfcrypt/sp.h index 9934a60e2a3..eeb1131d5aa 100644 --- a/wolfssl/wolfcrypt/sp.h +++ b/wolfssl/wolfcrypt/sp.h @@ -107,6 +107,40 @@ WOLFSSL_LOCAL int sp_RsaPrivate_4096(const byte* in, word32 inLen, #endif /* HAVE_FIPS_VERSION && HAVE_FIPS_VERSION == 2 && !WOLFSSL_SP_ARM[32|64]_ASM */ +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +#ifndef WOLFSSL_SP_NO_2048 +WOLFSSL_LOCAL int sp_RsaPublic_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* em, const mp_int* mm, byte* out, + word32* outLen); +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +WOLFSSL_LOCAL int sp_RsaPrivate_2048_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* dm, const mp_int* mm, byte* out, + word32* outLen); +#endif +#endif +#ifndef WOLFSSL_SP_NO_3072 +WOLFSSL_LOCAL int sp_RsaPublic_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* em, const mp_int* mm, byte* out, + word32* outLen); +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +WOLFSSL_LOCAL int sp_RsaPrivate_3072_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* dm, const mp_int* mm, byte* out, + word32* outLen); +#endif +#endif +#ifdef WOLFSSL_SP_4096 +WOLFSSL_LOCAL int sp_RsaPublic_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* em, const mp_int* mm, byte* out, + word32* outLen); +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +WOLFSSL_LOCAL int sp_RsaPrivate_4096_nb(sp_rsa_ctx_t* sp_ctx, const byte* in, + word32 inLen, const mp_int* dm, const mp_int* mm, byte* out, + word32* outLen); +#endif +#endif +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA) @@ -167,6 +201,31 @@ WOLFSSL_LOCAL int sp_DhExp_4096(const mp_int* base, const byte* exp, #endif /* HAVE_FIPS_VERSION && HAVE_FIPS_VERSION == 2 && !WOLFSSL_SP_ARM[32|64]_ASM */ +#if defined(WOLFSSL_SP_NONBLOCK) && defined(WOLFSSL_SP_SMALL) && \ + !defined(WOLFSSL_SP_FAST_MODEXP) +#ifndef WOLFSSL_SP_NO_2048 +WOLFSSL_LOCAL int sp_DhExp_2048_nb(sp_dh_ctx_t* sp_ctx, const byte* base, + word32 baseSz, const byte* exp, word32 expLen, const mp_int* mod, + byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_ModExp_2048_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res); +#endif +#ifndef WOLFSSL_SP_NO_3072 +WOLFSSL_LOCAL int sp_DhExp_3072_nb(sp_dh_ctx_t* sp_ctx, const byte* base, + word32 baseSz, const byte* exp, word32 expLen, const mp_int* mod, + byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_ModExp_3072_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res); +#endif +#ifdef WOLFSSL_SP_4096 +WOLFSSL_LOCAL int sp_DhExp_4096_nb(sp_dh_ctx_t* sp_ctx, const byte* base, + word32 baseSz, const byte* exp, word32 expLen, const mp_int* mod, + byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_ModExp_4096_nb(sp_dh_ctx_t* sp_ctx, const mp_int* base, + const mp_int* exp, const mp_int* mod, mp_int* res); +#endif +#endif /* WOLFSSL_SP_NONBLOCK && WOLFSSL_SP_SMALL && !WOLFSSL_SP_FAST_MODEXP */ + #endif /* WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index 874b0870fc2..08b0e4f2c17 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -401,6 +401,37 @@ typedef struct sp_ecc_ctx { } sp_ecc_ctx_t; #endif +#if defined(WOLFSSL_SP_NONBLOCK) && \ + (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) +/* Non-blocking RSA / DH operation contexts. The wrapper state struct + * embeds the inner modexp ctx (which dominates the size) plus per-op + * buffers for the modulus, base/result, and exponent. Sized for the + * largest enabled key size with C64 word layout (worst case). */ +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(NO_RSA) +typedef struct sp_rsa_ctx { + #ifdef WOLFSSL_SP_4096 + XALIGNED(8) byte data[6144]; + #elif !defined(WOLFSSL_SP_NO_3072) + XALIGNED(8) byte data[2560]; + #else + XALIGNED(8) byte data[1664]; + #endif +} sp_rsa_ctx_t; +#endif + +#if defined(WOLFSSL_HAVE_SP_DH) && !defined(NO_DH) +typedef struct sp_dh_ctx { + #ifdef WOLFSSL_SP_4096 + XALIGNED(8) byte data[6144]; + #elif !defined(WOLFSSL_SP_NO_3072) + XALIGNED(8) byte data[2560]; + #else + XALIGNED(8) byte data[1664]; + #endif +} sp_dh_ctx_t; +#endif +#endif /* WOLFSSL_SP_NONBLOCK && (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) */ + #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL) #include