From fe608f5208861b41db0b0528fce49c687a42021d Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 4 May 2026 15:49:07 -0700 Subject: [PATCH] Adds new STM32 Bare support for Hash, SAES/AES and PKA --- wolfcrypt/src/aes.c | 57 +- wolfcrypt/src/ecc.c | 21 +- wolfcrypt/src/port/st/stm32.c | 1337 ++++++++++++++++++++++++++++- wolfcrypt/src/random.c | 10 +- wolfssl/wolfcrypt/port/st/stm32.h | 307 ++++++- wolfssl/wolfcrypt/settings.h | 55 +- 6 files changed, 1767 insertions(+), 20 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index c2c982203fe..d347095ba14 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -227,6 +227,10 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesEncrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal driver handles mutex, clock and key/IV internally. */ + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -367,6 +371,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ @@ -375,6 +380,9 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesDecrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 0); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -521,6 +529,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ @@ -5575,7 +5584,34 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef HAVE_AES_CBC #if defined(STM32_CRYPTO) -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_STM32_BARE + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 1); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 0); + } + #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_STM32U5_DHUK) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int ret = 0; @@ -6955,6 +6991,11 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) { + #ifdef WOLFSSL_STM32_BARE + /* CTR per-block transform: ECB-encrypt the counter (passed in + * 'in'); aes.c handles counter increment and XOR with plaintext. */ + return wc_Stm32_Aes_Ecb(aes, out, in, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -7065,6 +7106,7 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) wolfSSL_CryptHwMutexUnLock(); wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } @@ -10141,6 +10183,15 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, authIn, authInSz); #endif +#if defined(WOLFSSL_STM32_BARE) && defined(STM32_CRYPTO) + ret = wc_Stm32_Aes_Gcm(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, + authIn, authInSz, 1 /* enc */); + if (ret != WC_NO_ERR_TRACE(CRYPTOCB_UNAVAILABLE)) + return ret; + /* fall through to SW GCM (still uses HW AES via wc_AesEncrypt) */ +#endif /* WOLFSSL_STM32_BARE && STM32_CRYPTO */ + #ifdef STM32_CRYPTO_AES_GCM return wc_AesGcmEncrypt_STM32( aes, out, in, sz, iv, ivSz, @@ -10870,6 +10921,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, #endif + /* BARE: GCM decrypt always uses SW path (with HW AES blocks via + * wc_AesEncrypt). Encrypt is HW-accelerated above; decrypt + tag + * verification stays in well-tested SW for now. */ + #ifdef STM32_CRYPTO_AES_GCM /* The STM standard peripheral library API's doesn't support partial blocks */ return wc_AesGcmDecrypt_STM32( diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 684a7d4c0d3..4de10656f72 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -286,8 +286,12 @@ ECC Curve Sizes: #if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ !defined(WOLFSSL_KCAPI_ECC) && !defined(WOLFSSL_SE050) && \ - !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && !defined(WOLFSSL_STM32_PKA) && \ + !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && \ + !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ !defined(WOLFSSL_PSOC6_CRYPTO) + /* WOLFSSL_STM32_BARE+PKA still uses the SW ECDSA helper paths + * (sign/verify) since the bare-metal driver only implements ECCMul + * HW; the SP-less SW ECDSA fallback then drives that HW. */ #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -6947,7 +6951,12 @@ static int deterministic_sign_helper(const byte* in, word32 inlen, ecc_key* key) #endif /* WOLFSSL_ECDSA_DETERMINISTIC_K || WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT */ -#if defined(WOLFSSL_STM32_PKA) +/* Under WOLFSSL_STM32_BARE the bare-metal PKA driver implements only + * ECCMul HW (the building block used by ECDH and the SP-less SW ECDSA + * path). HW ECDSA sign/verify is intentionally not wired up in v1 of + * the bare driver -- fall back to the standard SW ECDSA which itself + * calls wc_ecc_mulmod_ex2() (HW-accelerated). */ +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s) { @@ -8751,7 +8760,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, #ifndef WOLF_CRYPTO_CB_ONLY_ECC -#if !defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_PSOC6_CRYPTO) && \ +#if !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ + !defined(WOLFSSL_PSOC6_CRYPTO) && \ !defined(WOLF_CRYPTO_CB_ONLY_ECC) static int wc_ecc_check_r_s_range(ecc_key* key, mp_int* r, mp_int* s) { @@ -9267,7 +9277,10 @@ static int ecc_verify_hash(mp_int *r, mp_int *s, const byte* hash, int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) { -#if defined(WOLFSSL_STM32_PKA) +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) + /* See comment above wc_ecc_sign_hash_ex(): BARE uses SW ECDSA + * verify which internally accelerates the scalar muls via the + * bare-metal HW wc_ecc_mulmod_ex2(). */ return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); #elif defined(WOLFSSL_PSOC6_CRYPTO) return psoc6_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 644b85634f7..77cc99210f5 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -46,6 +46,13 @@ #ifdef WOLFSSL_STM32_PKA #include +#ifdef WOLFSSL_STM32_BARE +/* Bare-metal: CMSIS device header is pulled in by settings.h. The + * PKA_HandleTypeDef and the PKA_ECC / PKA_ECDSA IO typedefs are + * provided by above. The HAL_PKA_* + * entry points are implemented further down in this file under the + * matching guard. */ +#else #if defined(WOLFSSL_STM32L5) #include #include @@ -76,7 +83,16 @@ #else #error Please add the hal_pk.h include #endif +#endif /* !WOLFSSL_STM32_BARE */ + +#ifdef WOLFSSL_STM32_BARE +/* Provide the global PKA handle that the wc_ecc_mulmod_ex2() and + * stm32_ecc_*_hash_ex() paths reference via &hpka. Under HAL builds, + * the application supplies this; under BARE we own it. */ +PKA_HandleTypeDef hpka = { 0 }; +#else extern PKA_HandleTypeDef hpka; +#endif #if !defined(WOLFSSL_STM32_PKA_V2) && defined(PKA_ECC_SCALAR_MUL_IN_B_COEFF) /* PKA hardware like in U5 added coefB and primeOrder */ @@ -92,6 +108,493 @@ extern PKA_HandleTypeDef hpka; #define WOLFSSL_HAVE_ECC_KEY_GET_PRIV #endif #endif /* HAVE_ECC */ + +/* ------------------------------------------------------------------------ + * Bare-metal HAL_PKA_* shims + * + * These provide just the slice of the ST HAL surface that the existing + * wolfssl PKA path (below) calls into. Direct register access only; no + * HAL/StdPeriph dependency. Reference: STM32WBxx_HAL_Driver/Src/ + * stm32wbxx_hal_pka.c (PKA_ECCMul_Set, HAL_PKA_ECCMul, PKA_Process, + * HAL_PKA_ECCMul_GetResult, etc). + * + * Layout-wise this matches the V1 PKA (WB55, WL, MP13). For V2 PKA (H5, + * U5 with PKA, WBA), the additional input slots (coefB, primeOrder, + * pointCheck) live at different word offsets but the start sequence and + * the SR/CLRFR bit names are identical, so the same code path applies + * once WOLFSSL_STM32_PKA_V2 is set (auto-detected via the device-header + * macro PKA_ECC_SCALAR_MUL_IN_B_COEFF below). + * --------------------------------------------------------------------- */ +#ifdef WOLFSSL_STM32_BARE + +/* PKA RAM occupies addresses PKA_BASE+0x400 .. PKA_BASE+0x11F4 on V1 and + * a slightly larger window on V2. The CMSIS device header sizes the + * RAM[] array correctly for the part. */ +#ifndef PKA_RAM_PARAM_END +/* HAL writes one zero word past the last valid byte (microcode reads + * the parameter until it hits a zero word). */ +#define PKA_RAM_PARAM_END(RAM, IDX) \ + do { (RAM)[(IDX)] = 0UL; } while (0) +#endif + +/* Mode encoding constants (from stm32wbxx_hal_pka.h and equivalent). + * Same numeric values across V1 and V2. */ +#ifndef PKA_MODE_ECC_MUL +#define PKA_MODE_ECC_MUL (0x00000020U) +#endif +#ifndef PKA_MODE_ECDSA_VERIFICATION +#define PKA_MODE_ECDSA_VERIFICATION (0x00000026U) +#endif +#ifndef PKA_MODE_ECDSA_SIGNATURE +#define PKA_MODE_ECDSA_SIGNATURE (0x00000024U) +#endif + +/* Number of word slots in the PKA RAM array (per the CMSIS device + * header; e.g. 894 on WB55 V1). */ +#define WC_STM32_PKA_RAM_WORDS \ + (sizeof(((PKA_TypeDef*)0)->RAM) / sizeof(((PKA_TypeDef*)0)->RAM[0])) + +/* Big-endian byte buffer -> PKA RAM (little-endian word order). The + * destination is the PKA RAM slot indexed by 'word_idx'; n is the byte + * count of the source. Mirrors PKA_Memcpy_u8_to_u32 in the HAL. */ +static void wc_stm32_pka_load_be(volatile uint32_t* dst, const uint8_t* src, + uint32_t n) +{ + uint32_t index = 0; + if (dst == NULL || src == NULL) return; + + for (; index < (n / 4U); index++) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16) | + ((uint32_t)src[(n - (index * 4U) - 4U)] << 24); + } + if ((n % 4U) == 1U) { + dst[index] = (uint32_t)src[(n - (index * 4U) - 1U)]; + } + else if ((n % 4U) == 2U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8); + } + else if ((n % 4U) == 3U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16); + } +} + +/* PKA RAM (little-endian word order) -> big-endian byte buffer. */ +static void wc_stm32_pka_read_be(uint8_t* dst, volatile const uint32_t* src, + uint32_t n) +{ + uint32_t i = 0; + if (dst == NULL || src == NULL) return; + + for (; i < (n / 4U); i++) { + uint32_t off = n - 4U - (i * 4U); + dst[off + 3U] = (uint8_t)((src[i] ) & 0xFFU); + dst[off + 2U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[off + 1U] = (uint8_t)((src[i] >> 16) & 0xFFU); + dst[off + 0U] = (uint8_t)((src[i] >> 24) & 0xFFU); + } + if ((n % 4U) == 1U) { + dst[0U] = (uint8_t)(src[i] & 0xFFU); + } + else if ((n % 4U) == 2U) { + dst[1U] = (uint8_t)((src[i] ) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 8) & 0xFFU); + } + else if ((n % 4U) == 3U) { + dst[2U] = (uint8_t)((src[i] ) & 0xFFU); + dst[1U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 16) & 0xFFU); + } +} + +/* Optimal bit-size: bytes * 8 minus the leading-zero count of the MSB + * (matches PKA_GetOptBitSize_u8 in the HAL). */ +static uint32_t wc_stm32_pka_optbits(uint32_t byteNumber, uint8_t msb) +{ + uint32_t pos = 0; + uint32_t v = msb; + while (v != 0U) { + v >>= 1; + pos++; + } + if (byteNumber == 0U) { + return 0U; + } + return ((byteNumber - 1U) * 8U) + pos; +} + +HAL_StatusTypeDef HAL_PKA_Init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) { + return HAL_ERROR; + } + if (hpkah->Instance == NULL) { + hpkah->Instance = PKA; + } + +#ifdef WC_STM32_PKA_CLK_ENABLE + WC_STM32_PKA_CLK_ENABLE(); +#endif + + /* Reset CR, enable the PKA, clear any pending flags. */ + hpkah->Instance->CR = PKA_CR_EN; + hpkah->Instance->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_OK; +} + +/* Lazy one-shot init helper. Safe to call from every entry point. */ +static void wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) return; + if (hpkah->Instance == NULL) { + (void)HAL_PKA_Init(hpkah); + } +} + +void HAL_PKA_RAMReset(PKA_HandleTypeDef *hpkah) +{ + uint32_t i; + if (hpkah == NULL || hpkah->Instance == NULL) return; + for (i = 0; i < WC_STM32_PKA_RAM_WORDS; i++) { + hpkah->Instance->RAM[i] = 0UL; + } +} + +/* Generic start-and-poll sequence. */ +static HAL_StatusTypeDef wc_stm32_pka_process(PKA_HandleTypeDef *hpkah, + uint32_t mode) +{ + PKA_TypeDef *p; + uint32_t cr; + + if (hpkah == NULL || hpkah->Instance == NULL) { + return HAL_ERROR; + } + p = hpkah->Instance; + + /* PKA must be enabled before MODE/START are written. */ + if ((p->CR & PKA_CR_EN) == 0U) { + p->CR = PKA_CR_EN; + } + + /* Update the mode field in CR; clear interrupt enables. */ + cr = p->CR; + cr &= ~(PKA_CR_MODE | PKA_CR_PROCENDIE | PKA_CR_RAMERRIE | PKA_CR_ADDRERRIE); + cr |= (mode << PKA_CR_MODE_Pos) & PKA_CR_MODE; + p->CR = cr; + + /* Start the operation. */ + p->CR = cr | PKA_CR_START; + + /* Wait for end-of-operation flag. */ + while ((p->SR & PKA_SR_PROCENDF) == 0U) { + /* No timeout in BARE -- HAL_MAX_DELAY semantics. The hardware + * completes WB55 P-256 scalar multiplication in well under + * 100 ms; if we're here forever something is wrong upstream. */ + } + + /* Clear all status flags. */ + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | PKA_CLRFR_ADDRERRFC; + + return HAL_OK; +} + +HAL_StatusTypeDef HAL_PKA_ECCMul(PKA_HandleTypeDef *hpkah, + PKA_ECCMulInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + /* Scalar 'k' bit length, modulus bit length, and 'a' coefficient + * sign indicator -- exactly as the HAL writes them. */ + RAM[PKA_ECC_SCALAR_MUL_IN_EXP_NB_BITS] = + wc_stm32_pka_optbits(in->scalarMulSize, *(in->scalarMul)); + RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF_SIGN] = in->coefSign; + + /* |a|, modulus p, scalar k, base point (X,Y). */ + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF], + in->coefA, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_K], + in->scalarMul, in->scalarMulSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_K + ((in->scalarMulSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X], + in->pointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y], + in->pointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + +#ifdef WOLFSSL_STM32_PKA_V2 + /* V2 PKA also requires the curve order n and 'b' coefficient. */ + if (in->coefB != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_B_COEFF], + in->coefB, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_B_COEFF + + ((in->modulusSize + 3U) / 4U)); + } + if (in->primeOrder != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER], + in->primeOrder, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER + + ((in->modulusSize + 3U) / 4U)); + } +#endif /* WOLFSSL_STM32_PKA_V2 */ + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECC_MUL); +} + +void HAL_PKA_ECCMul_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECCMulOutTypeDef *out) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL || out == NULL) return; + RAM = hpkah->Instance->RAM; + + /* The HAL recomputes the byte size from the saved IN_OP_NB_BITS + * slot. We do the same. */ + size = (RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] + 7U) / 8U; + + if (out->ptX != NULL) { + wc_stm32_pka_read_be(out->ptX, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], size); + } + if (out->ptY != NULL) { + wc_stm32_pka_read_be(out->ptY, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], size); + } +} + +HAL_StatusTypeDef HAL_PKA_ECDSAVerif(PKA_HandleTypeDef *hpkah, + PKA_ECDSAVerifInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_VERIF_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_VERIF_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_VERIF_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X], + in->pPubKeyCurvePtX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y], + in->pPubKeyCurvePtY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_R], + in->RSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_R + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_S], + in->SSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_S + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_VERIFICATION); +} + +uint32_t HAL_PKA_ECDSAVerif_IsValidSignature(PKA_HandleTypeDef const *const hpkah) +{ + if (hpkah == NULL || hpkah->Instance == NULL) return 0U; + /* HAL semantic: PKA_ECDSA_VERIF_OUT_RESULT == 0 means valid. */ + return (hpkah->Instance->RAM[PKA_ECDSA_VERIF_OUT_RESULT] == 0UL) ? 1U : 0U; +} + +HAL_StatusTypeDef HAL_PKA_ECDSASign(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + HAL_StatusTypeDef st; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_SIGN_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_SIGN_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_K], + in->integer, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_K + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D], + in->privateKey, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + st = wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_SIGNATURE); + if (st != HAL_OK) { + return st; + } + /* Sign reports failure via PKA_ECDSA_SIGN_OUT_ERROR != 0 (e.g. when + * the random k is unsuitable). The caller is expected to retry with + * a fresh k. */ + if (RAM[PKA_ECDSA_SIGN_OUT_ERROR] != 0UL) { + return HAL_ERROR; + } + return HAL_OK; +} + +void HAL_PKA_ECDSASign_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignOutTypeDef *out, + PKA_ECDSASignOutExtParamTypeDef *outExt) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL) return; + RAM = hpkah->Instance->RAM; + size = (RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] + 7U) / 8U; + + if (out != NULL) { + if (out->RSign != NULL) { + wc_stm32_pka_read_be(out->RSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], size); + } + if (out->SSign != NULL) { + wc_stm32_pka_read_be(out->SSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], size); + } + } + if (outExt != NULL) { + if (outExt->ptX != NULL) { + wc_stm32_pka_read_be(outExt->ptX, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_X], size); + } + if (outExt->ptY != NULL) { + wc_stm32_pka_read_be(outExt->ptY, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_Y], size); + } + } +} + +#endif /* WOLFSSL_STM32_BARE */ + #endif /* WOLFSSL_STM32_PKA */ @@ -103,7 +606,9 @@ extern PKA_HandleTypeDef hpka; #ifndef STM32_HASH_CLOCK_ENABLE static WC_INLINE void wc_Stm32_Hash_Clock_Enable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_ENABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_ENABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, ENABLE); @@ -116,7 +621,9 @@ extern PKA_HandleTypeDef hpka; #ifndef STM32_HASH_CLOCK_DISABLE static WC_INLINE void wc_Stm32_Hash_Clock_Disable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_DISABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_DISABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, DISABLE); @@ -224,9 +731,16 @@ static void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) sz = digestSize; while (sz > 0) { - /* first 20 bytes come from instance HR */ + /* first 20 bytes come from the instance digest registers. The CMSIS + * device header for the H5 family renames this from HR[5] to HRA[5] + * (extended HASH IP that adds a separate HASH_DIGEST->HR[16] for the + * full digest); the older F4/F7/L4 layout still uses HR[5]. */ if (i < 5) { + #if defined(WOLFSSL_STM32H5) + digest[i] = HASH->HRA[i]; + #else digest[i] = HASH->HR[i]; + #endif } #ifdef HASH_DIGEST /* reset comes from HASH_DIGEST */ @@ -643,7 +1157,820 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #ifdef STM32_CRYPTO #ifndef NO_AES -#ifdef WOLFSSL_STM32_CUBEMX +#ifdef WOLFSSL_STM32_BARE + +/* ===== Bare-metal direct-register AES driver ===== + * No HAL or StdPeriph. Two IP variants: + * - CRYP (FIFO-based): F2/F4/F7/H7/MP13 + * - AES/SAES (TinyAES): L4/L5/U5/H573/G0/G4/WB/WL/WBA + * Variant selected via family ifdefs below. */ + +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32H7) || \ + defined(WOLFSSL_STM32MP13) +/* ----- CRYP IP (FIFO-based) ----- */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* DATATYPE = 10b (byte) so CRYP byte-swaps DR/DOUT for us; key/IV regs are + * still big-endian. Key arrives pre-reversed via wc_AesSetKey (aes.c:4161); + * IV is byte-reversed locally before write. */ +#define STM32_CRYP_DATATYPE_BYTE CRYP_CR_DATATYPE_1 + +static int Stm32AesWaitBusy(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_BUSY) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitInNotFull(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_IFNF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitOutNotEmpty(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_OFNE) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 24) { + return CRYP_CR_KEYSIZE_0; /* 192-bit */ + } + if (keyLen == 32) { + return CRYP_CR_KEYSIZE_1; /* 256-bit */ + } + return 0; /* 128-bit */ +} + +/* aes->key is pre-byte-reversed by wc_AesSetKey under BARE (aes.c:4161), + * so the key words go straight into the K registers in big-endian form. */ +static void Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->K2LR = key[0]; CRYP->K2RR = key[1]; + CRYP->K3LR = key[2]; CRYP->K3RR = key[3]; + } + else if (keyLen == 24) { + CRYP->K1LR = key[0]; CRYP->K1RR = key[1]; + CRYP->K2LR = key[2]; CRYP->K2RR = key[3]; + CRYP->K3LR = key[4]; CRYP->K3RR = key[5]; + } + else { /* 32 */ + CRYP->K0LR = key[0]; CRYP->K0RR = key[1]; + CRYP->K1LR = key[2]; CRYP->K1RR = key[3]; + CRYP->K2LR = key[4]; CRYP->K2RR = key[5]; + CRYP->K3LR = key[6]; CRYP->K3RR = key[7]; + } +} + +/* aes->reg (IV) is NOT pre-reversed by wc_AesSetIV, so byte-reverse here so + * the IV registers see big-endian words. */ +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + CRYP->IV0LR = v[0]; CRYP->IV0RR = v[1]; + CRYP->IV1LR = v[2]; CRYP->IV1RR = v[3]; +} + +/* Push 4 input words then drain 4 output words. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + /* Local word-aligned copy so callers may pass byte-aligned ptrs. */ + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + return ret; + } + buf[i] = CRYP->DOUT; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + return 0; +} + +/* CBC/ECB decrypt requires a key-prep pass first (per F4/H7 reference manual: + * load key, run ALGOMODE=AES_KEY, wait BUSY=0, then start the actual op). */ +static int Stm32AesPrepareKey(word32 keyLen) +{ + int ret; + + CRYP->CR = CRYP_CR_ALGOMODE_AES_KEY | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR |= CRYP_CR_CRYPEN; + ret = Stm32AesWaitBusy(); + CRYP->CR &= ~CRYP_CR_CRYPEN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + + cr = CRYP_CR_ALGOMODE_AES_ECB | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + cr = CRYP_CR_ALGOMODE_AES_CBC | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + /* Update aes->reg with new IV (last cipher block for enc; last input + * cipher block for dec). aes.c CBC dispatcher expects aes->reg + * updated for the next call. */ + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, in + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* CTR: handled via the ECB-as-transform path in aes.c (XTRANSFORM_AESCTRBLOCK). + * Each per-block ECB call comes through wc_Stm32_Aes_Ecb above; aes.c manages + * the counter and the XOR with plaintext. */ + +/* === HW GCM (CRYP IP phase machine) =========================================== + * Native HW GCM for the case the CRYP IP supports directly: + * - IV is 96 bits (12 bytes) -- the standard GCM IV + * - AAD and PT lengths are whole 16-byte blocks (no partial last block) + * Returns CRYPTOCB_UNAVAILABLE for unsupported parameter combos, so the + * caller (aes.c BARE GCM dispatcher) falls back to SW GHASH + HW ECB. */ +static int Stm32AesXferDiscardOut(const byte* in) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + return Stm32AesWaitBusy(); +} + +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + int ret, t; + word32 keyLen, b, blocks, i; + word32 cr_base; + word32 ivBuf[4]; + word32 hwTag[4]; + word64 aadBits, ptBits; + word32 aadBitsHi, aadBitsLo, ptBitsHi, ptBitsLo; + + /* Argument and capability check */ + if (aes == NULL || iv == NULL || tag == NULL) { + return BAD_FUNC_ARG; + } + if (sz > 0 && (in == NULL || out == NULL)) { + return BAD_FUNC_ARG; + } + /* HW only supports 12-byte IV (J0 = IV || 0x00000001 form) */ + if (ivSz != GCM_NONCE_MID_SZ) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (ivSz=%u not 12)\n", ivSz); + #endif + return CRYPTOCB_UNAVAILABLE; + } + /* CRYP IP v1 (F4) cannot natively handle partial last blocks. Force + * SW fallback for those cases. AAD partial is handled by HW (we pad + * with zeros and the GHASH is correct because GHASH only uses bitlen). + * PT partial would produce wrong CT bytes -- bail out to SW. */ + if (sz % WC_AES_BLOCK_SIZE != 0) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (sz=%u not whole-block)\n", sz); + #endif + return CRYPTOCB_UNAVAILABLE; + } +#ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> HW (sz=%u aadSz=%u)\n", sz, aadSz); +#endif + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Set CR (ALGOMODE=AES-GCM, DATATYPE, KEYSIZE, phase=init) BEFORE + * loading key/IV. H7 reference HAL sets ALGOMODE first, then writes + * the K and IV registers. Doing it in the other order on H7 produces + * a wrong tag even though CT comes out right. */ + cr_base = CRYP_CR_ALGOMODE_AES_GCM | STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR = cr_base | (0u << CRYP_CR_GCM_CCMPH_Pos); + + /* Load key (already pre-reversed by wc_AesSetKey under BARE). */ + Stm32AesLoadKey(aes->key, keyLen); + + /* Build the IV register value. CRYP expects the 12-byte IV concatenated + * with the initial 32-bit counter = 0x00000002 (HW pre-increments to 2 + * for the first payload block; at GCMPH=00 init phase HW sets up J0). */ + XMEMSET(ivBuf, 0, 16); + XMEMCPY(ivBuf, iv, 12); + ((byte*)ivBuf)[15] = 0x02; + ByteReverseWords(ivBuf, ivBuf, 16); + CRYP->IV0LR = ivBuf[0]; CRYP->IV0RR = ivBuf[1]; + CRYP->IV1LR = ivBuf[2]; CRYP->IV1RR = ivBuf[3]; + + /* === Phase 1: Init (GCMPH=00) === + * Enable CRYP and wait for CRYPEN to auto-clear -- this is the H7- + * documented mechanism for end-of-init-phase. F4 also auto-clears + * CRYPEN after init phase, so the same wait works on both IPs. */ + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + t = 0; + while ((CRYP->CR & CRYP_CR_CRYPEN) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto exit; + } + } + + /* === Phase 2: Header / AAD (GCMPH=01) === + * Whole AAD blocks: feed via DIN, no DOUT to read. Partial last AAD + * block: pad with zeros (AES HW absorbs them; GHASH math uses aadSz + * bits in the final phase to truncate). */ + if (aadSz > 0) { + word32 aadBlocks = aadSz / WC_AES_BLOCK_SIZE; + word32 aadPartial = aadSz % WC_AES_BLOCK_SIZE; + + CRYP->CR = cr_base | (1u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < aadBlocks; b++) { + ret = Stm32AesXferDiscardOut(aad + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + if (aadPartial > 0) { + byte pad[WC_AES_BLOCK_SIZE]; + XMEMSET(pad, 0, sizeof(pad)); + XMEMCPY(pad, aad + aadBlocks * WC_AES_BLOCK_SIZE, aadPartial); + ret = Stm32AesXferDiscardOut(pad); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 3: Payload (GCMPH=10) === */ + if (sz > 0) { + blocks = sz / WC_AES_BLOCK_SIZE; + CRYP->CR = cr_base | (2u << CRYP_CR_GCM_CCMPH_Pos); + if (!isEnc) { + CRYP->CR |= CRYP_CR_ALGODIR; + } + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 4: Final (GCMPH=11) === + * Feed 64-bit AAD-bit-len then 64-bit PT-bit-len, then read 4 DOUT + * words for the tag. + * + * H7 rev.B+ / MP13 (CRYP_VER_2_2): DIN final-phase writes are taken + * "normally" (HW does the DATATYPE swap). Write plain uint32s. + * + * F2/F4/F7 (older CRYP IP, behaves like H7 rev.A): DATATYPE swap + * does NOT apply to the final-phase length block; SW must pre-swap + * via __REV (byte-reverse the 32-bit value). The two HAL families + * disagree on this and so do their reference drivers -- match each. + */ + aadBits = (word64)aadSz * 8u; + ptBits = (word64)sz * 8u; + aadBitsHi = (word32)(aadBits >> 32); + aadBitsLo = (word32)aadBits; + ptBitsHi = (word32)(ptBits >> 32); + ptBitsLo = (word32)ptBits; +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) + aadBitsHi = __REV(aadBitsHi); + aadBitsLo = __REV(aadBitsLo); + ptBitsHi = __REV(ptBitsHi); + ptBitsLo = __REV(ptBitsLo); +#endif + + CRYP->CR = cr_base | (3u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsLo; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsLo; + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + goto exit; + } + hwTag[i] = CRYP->DOUT; + } + XMEMCPY(tag, hwTag, tagSz < 16 ? tagSz : 16); + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +#else /* TinyAES IP (L4/L5/U5/H5/H573/G0/G4/WB/WL/WBA) */ + +/* ----- TinyAES IP (single-register, polled) ----- + * Different from CRYP: no FIFO; one DINR / DOUTR pair processed per + * 16-byte block. KEYRx are written in *reversed* word order + * (KEYR3 = MSB key word for 128-bit; KEYR7 = MSB for 256-bit). + * AES-192 not supported by hardware (only 128 and 256). */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* CCF (computation-complete flag) clear: newer TinyAES IPs (U3/U5/L4/L5/H5 + * etc.) have a dedicated AES_ICR register with a CCF bit. Older TinyAES IPs + * (WB/WL/G0) don't -- CCF is cleared by writing 1 to AES_CR.CCFC. */ +#if defined(AES_ICR_CCF) + #define STM32_AES_CLEAR_CCF() do { CRYP->ICR = AES_ICR_CCF; } while (0) +#elif defined(AES_CR_CCFC) + #define STM32_AES_CLEAR_CCF() do { CRYP->CR |= AES_CR_CCFC; } while (0) +#else + #error "STM32 AES IP variant: no CCF-clear mechanism known" +#endif + +#define STM32_AES_DATATYPE_BYTE AES_CR_DATATYPE_1 /* 0b10 */ +#define STM32_AES_CHMOD_ECB 0u +#define STM32_AES_CHMOD_CBC AES_CR_CHMOD_0 +#define STM32_AES_CHMOD_CTR AES_CR_CHMOD_1 +#define STM32_AES_CHMOD_GCM (AES_CR_CHMOD_0 | AES_CR_CHMOD_1) +#define STM32_AES_MODE_ENC 0u +#define STM32_AES_MODE_KEYDERIVE AES_CR_MODE_0 +#define STM32_AES_MODE_DEC AES_CR_MODE_1 +#define STM32_AES_MODE_KD_DEC (AES_CR_MODE_0 | AES_CR_MODE_1) + +static int Stm32AesWaitCCF(void) +{ + int t = 0; + while ((CRYP->SR & AES_SR_CCF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE AES] CCF timeout: SR=0x%08lx CR=0x%08lx\n", + (unsigned long)CRYP->SR, (unsigned long)CRYP->CR); + #endif + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 32) { + return AES_CR_KEYSIZE; /* 256-bit */ + } + return 0; /* 128-bit (192 not supported by HW) */ +} + +/* Key registers must be written in increasing register order (KEYR0 first + * per RM). KEYR(N-1) holds the MSB word. + * aes->key arrives pre-byte-reversed (per wc_AesSetKey under BARE), so the + * highest-significance byte of the AES key is in the MSB of aes->key[0]. */ +static int Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->KEYR0 = key[3]; + CRYP->KEYR1 = key[2]; + CRYP->KEYR2 = key[1]; + CRYP->KEYR3 = key[0]; + return 0; + } + if (keyLen == 32) { + CRYP->KEYR0 = key[7]; + CRYP->KEYR1 = key[6]; + CRYP->KEYR2 = key[5]; + CRYP->KEYR3 = key[4]; + CRYP->KEYR4 = key[3]; + CRYP->KEYR5 = key[2]; + CRYP->KEYR6 = key[1]; + CRYP->KEYR7 = key[0]; + return 0; + } + /* AES-192 not supported by TinyAES hardware */ + return BAD_FUNC_ARG; +} + +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + /* IVRx ordering matches keyword: IVR3 = MSB */ + CRYP->IVR3 = v[0]; + CRYP->IVR2 = v[1]; + CRYP->IVR1 = v[2]; + CRYP->IVR0 = v[3]; +} + +/* One 16-byte block in / out. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + CRYP->DINR = buf[i]; + } + ret = Stm32AesWaitCCF(); + if (ret != 0) { + return ret; + } + for (i = 0; i < 4; i++) { + buf[i] = CRYP->DOUTR; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + /* Clear CCF for next block */ + STM32_AES_CLEAR_CCF(); + return 0; +} + +/* Run the key-derivation pass before decrypt (CBC/ECB). */ +static int Stm32AesPrepareKey(word32 keyLen, word32 chmod) +{ + int ret; + word32 cr = STM32_AES_MODE_KEYDERIVE | STM32_AES_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen) | chmod; + CRYP->CR = cr; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; /* TinyAES has no 192-bit support */ + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Disable AES first; configure CR (with KEYSIZE) BEFORE writing the + * key registers (per RM the AES must know the key size before keys + * are loaded). Then enable. */ + CRYP->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_ECB | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + CRYP->CR = cr; + STM32_AES_CLEAR_CCF(); /* clear any stale CCF/ERR */ + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass: temporarily set MODE=01 (key derive) */ + CRYP->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + CRYP->CR = cr; + } + + CRYP->CR |= AES_CR_EN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Configure CR (with KEYSIZE) BEFORE loading keys -- per RM. */ + CRYP->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_CBC | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + CRYP->CR = cr; + STM32_AES_CLEAR_CCF(); + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass for decrypt */ + CRYP->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + CRYP->CR = cr; + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + CRYP->CR |= AES_CR_EN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, in + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* TinyAES HW GCM: deferred. Falls back to software GCM (with HW ECB + * blocks via wc_AesEncrypt -> wc_Stm32_Aes_Ecb). */ +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + (void)aes; (void)out; (void)in; (void)sz; + (void)iv; (void)ivSz; + (void)tag; (void)tagSz; + (void)aad; (void)aadSz; (void)isEnc; + return CRYPTOCB_UNAVAILABLE; +} + +#endif /* CRYP IP vs TinyAES IP */ + +#elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32U5_DHUK) /* Set the DHUK IV to be used when unwrapping an AES key @@ -878,7 +2205,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, void wc_Stm32_Aes_Cleanup(void) { } -#endif /* WOLFSSL_STM32_CUBEMX */ +#endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index efc9eaf59a7..6527d7f348b 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -4241,6 +4241,9 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #elif defined(STM32_RNG) /* Generate a RNG seed using the hardware random number generator * on the STM32F2/F4/F7/L4. */ + #include + /* Pulls in WC_STM32_RNG_CLK_ENABLE for WOLFSSL_STM32_BARE builds */ + #ifdef WOLFSSL_STM32_CUBEMX int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) @@ -4325,7 +4328,12 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #ifndef STM32_NUTTX_RNG /* enable RNG peripheral clock */ - RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); + #else + /* Default for F4/F7/L4/L5/U5/H5/H7 -- RNG on AHB2 */ + RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #endif #endif /* enable RNG interrupt, set IE bit in RNG->CR register */ diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 9aa0d418ae1..bc12bd6bae0 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -23,11 +23,278 @@ #define _WOLFPORT_STM32_H_ /* Generic STM32 Hashing and Crypto Functions */ -/* Supports CubeMX HAL or Standard Peripheral Library */ +/* Supports CubeMX HAL, Standard Peripheral Library, or bare-metal direct + * register access (WOLFSSL_STM32_BARE). */ #include #include /* for MATH_INT_T */ +#ifdef WOLFSSL_STM32_BARE +/* Per-family direct-register clock-enable macros. CMSIS device header is + * already included via settings.h. RCC->...ENR bit names come from CMSIS. */ +#if defined(WOLFSSL_STM32H5) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32F4) || defined(WOLFSSL_STM32F7) || \ + defined(WOLFSSL_STM32H7) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_CRYPEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_CRYPEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32L4) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3 RCC uses AHB2ENR1 (not AHB2ENR). AES bit only present on + * variants that have the peripheral (U585+, U385+). */ + #ifdef RCC_AHB2ENR1_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_AESEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_AESEN; } while (0) + #endif + #ifdef RCC_AHB2ENR1_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_HASHEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_HASHEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_RNGEN; (void)RCC->AHB2ENR1; } while (0) +#elif defined(WOLFSSL_STM32G0) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_AESEN; (void)RCC->AHBENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHBENR &= ~RCC_AHBENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_RNGEN; (void)RCC->AHBENR; } while (0) +#elif defined(WOLFSSL_STM32WB) + /* WB55 dual-core: AES1 is the M4 (CPU1) application AES, on AHB2. + * AES2 sits on AHB4/AHB3 and is reserved for the M0+ side / shared use. + * The wolfcrypt port maps CRYP -> AES1 (see CRYP alias above), so use + * AES1's clock-enable bit. RNG is on AHB3. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AES1EN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AES1EN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_RNGEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32G4) + /* G4: TinyAES + RNG + PKA on AHB2. No HASH peripheral. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#endif + +/* Per-family direct-register clock-enable macro for the PKA peripheral. */ +#if defined(WOLFSSL_STM32WB) + /* WB55: PKA clock is on AHB3 */ + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3: AHB2ENR1.PKAEN */ + #ifdef RCC_AHB2ENR1_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_PKAEN; (void)RCC->AHB2ENR1; } while (0) + #endif +#elif defined(WOLFSSL_STM32H5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32G4) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32WBA) + #ifdef RCC_AHB1ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB1ENR |= RCC_AHB1ENR_PKAEN; (void)RCC->AHB1ENR; } while (0) + #endif +#endif + +/* Bare-metal stand-ins for the small subset of HAL types/values that + * wolfcrypt/src/port/st/stm32.c references in the PKA path. These mirror + * the ST HAL definitions; the bare-metal HAL_PKA_* shims in stm32.c + * implement the actual register sequence. */ +#ifdef WOLFSSL_STM32_PKA + +#include + +typedef enum { + HAL_OK = 0x00U, + HAL_ERROR = 0x01U, + HAL_BUSY = 0x02U, + HAL_TIMEOUT = 0x03U +} HAL_StatusTypeDef; + +#ifndef HAL_MAX_DELAY +#define HAL_MAX_DELAY 0xFFFFFFFFU +#endif + +typedef struct { + PKA_TypeDef *Instance; +} PKA_HandleTypeDef; + +typedef struct { + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coefA; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *primeOrder; /* V2 only */ + uint32_t scalarMulSize; + const uint8_t *scalarMul; + const uint8_t *pointX; + const uint8_t *pointY; +} PKA_ECCMulInTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECCMulOutTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *pPubKeyCurvePtX; + const uint8_t *pPubKeyCurvePtY; + const uint8_t *RSign; + const uint8_t *SSign; + const uint8_t *hash; +} PKA_ECDSAVerifInTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *hash; + const uint8_t *integer; + const uint8_t *privateKey; +} PKA_ECDSASignInTypeDef; + +typedef struct { + uint8_t *RSign; + uint8_t *SSign; +} PKA_ECDSASignOutTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECDSASignOutExtParamTypeDef; + +#endif /* WOLFSSL_STM32_PKA */ + +/* HAL-legacy macros that the existing direct-register HASH path depends on. + * Without HAL these aren't otherwise visible. */ +#if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32MP13) || \ + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H7S) || \ + defined(WOLFSSL_STM32U3) + /* New-generation HASH IP: 4-bit ALGO field at bits 20:17 */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #define HASH_ALGOSELECTION_SHA384 (HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1 | \ + HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_224 (HASH_CR_ALGO_0 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_256 (HASH_CR_ALGO_1 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) +#else + /* Older HASH IP (F4/F7/L4 family) ALGO bit mapping (per HAL): + * SHA1 = 0 + * MD5 = ALGO_0 + * SHA224 = ALGO_1 + * SHA256 = ALGO_0 | ALGO_1 + */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_MD5 HASH_CR_ALGO_0 + #ifdef HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #endif +#endif + +/* Legacy CamelCase aliases */ +#ifdef HASH_ALGOSELECTION_SHA1 + #define HASH_AlgoSelection_SHA1 HASH_ALGOSELECTION_SHA1 +#endif +#ifdef HASH_ALGOSELECTION_SHA224 + #define HASH_AlgoSelection_SHA224 HASH_ALGOSELECTION_SHA224 +#endif +#ifdef HASH_ALGOSELECTION_SHA256 + #define HASH_AlgoSelection_SHA256 HASH_ALGOSELECTION_SHA256 +#endif +#ifdef HASH_ALGOSELECTION_SHA384 + #define HASH_AlgoSelection_SHA384 HASH_ALGOSELECTION_SHA384 +#endif +#ifdef HASH_ALGOSELECTION_SHA512 + #define HASH_AlgoSelection_SHA512 HASH_ALGOSELECTION_SHA512 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_224 + #define HASH_AlgoSelection_SHA512_224 HASH_ALGOSELECTION_SHA512_224 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_256 + #define HASH_AlgoSelection_SHA512_256 HASH_ALGOSELECTION_SHA512_256 +#endif +#ifdef HASH_ALGOSELECTION_MD5 + #define HASH_AlgoSelection_MD5 HASH_ALGOSELECTION_MD5 +#endif + +#define HASH_ALGOMODE_HASH 0u +#ifdef HASH_CR_MODE + #define HASH_ALGOMODE_HMAC HASH_CR_MODE +#endif +/* Byte-stream input (auto byte-swap) */ +#ifdef HASH_CR_DATATYPE_1 + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_1 +#elif defined(HASH_CR_DATATYPE_0) + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_0 +#endif + +#endif /* WOLFSSL_STM32_BARE */ + + #ifdef STM32_HASH #include /* for uint32_t */ @@ -38,7 +305,8 @@ /* The HASH_DIGEST register indicates SHA224/SHA256 support */ #define STM32_HASH_SHA2 #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define HASH_CR_SIZE 103 #define HASH_MAX_DIGEST 64 /* Up to SHA512 */ @@ -68,7 +336,8 @@ /* These HASH HAL's have no MD5 implementation */ #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define STM32_NOMD5 #endif @@ -163,7 +432,8 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #ifndef NO_AES - #if !defined(STM32_CRYPTO_AES_GCM) && (defined(WOLFSSL_STM32F4) || \ + #if !defined(STM32_CRYPTO_AES_GCM) && !defined(WOLFSSL_STM32_BARE) && \ + (defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L4) || \ defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ @@ -173,6 +443,9 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Hardware supports AES GCM acceleration */ #define STM32_CRYPTO_AES_GCM #endif + /* Note: under WOLFSSL_STM32_BARE the GCM HW phase machine is not yet + * implemented. GCM falls back to the software path which still uses HW + * AES (via wc_AesEncrypt -> wc_Stm32_Aes_Ecb) for the underlying blocks. */ #if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) || \ defined(WOLFSSL_STM32WBA) @@ -186,9 +459,11 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ - defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32U5) || \ - defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) #define STM32_CRYPTO_AES_ONLY /* crypto engine only supports AES */ #endif #if defined(WOLFSSL_STM32H5) @@ -234,7 +509,23 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #define STM32_GCM_IV_START 2 struct Aes; - #ifdef WOLFSSL_STM32_CUBEMX + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal direct-register AES driver. ECB and CBC are HW-native; + * CTR is provided automatically via the ECB-as-transform path in + * aes.c (XTRANSFORM_AESCTRBLOCK); GCM is HW-native for the case + * the CRYP IP supports (12-byte IV + whole-block PT) and returns + * CRYPTOCB_UNAVAILABLE otherwise so aes.c can fall back to SW + * GHASH (which still uses HW ECB for the underlying AES blocks). */ + int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, + word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc); + #elif defined(WOLFSSL_STM32_CUBEMX) int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_HandleTypeDef* hcryp, int useSAES); void wc_Stm32_Aes_Cleanup(void); @@ -242,7 +533,7 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_InitTypeDef* cryptInit, CRYP_KeyInitTypeDef* keyInit); void wc_Stm32_Aes_Cleanup(void); - #endif /* WOLFSSL_STM32_CUBEMX */ + #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index ca74b140a82..dea6bc0cdc9 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2238,7 +2238,60 @@ extern void uITRON4_free(void *p) ; #define KEIL_INTRINSICS #endif #define NO_OLD_RNGNAME - #ifdef WOLFSSL_STM32_CUBEMX + + #if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_CUBEMX) + #error "WOLFSSL_STM32_BARE and WOLFSSL_STM32_CUBEMX are mutually exclusive" + #endif + /* WOLFSSL_STM32_PKA is now supported under WOLFSSL_STM32_BARE via the + * direct-register PKA driver in wolfcrypt/src/port/st/stm32.c. */ + + #ifdef WOLFSSL_STM32_BARE + /* Direct register access; no HAL or StdPeriph driver. Pull in only the + * CMSIS device header. Existing direct-register HASH path is reused; + * RNG goes through the existing WOLFSSL_STM32_RNG_NOLIB path. */ + #ifndef WOLFSSL_STM32_RNG_NOLIB + #define WOLFSSL_STM32_RNG_NOLIB + #endif + #if defined(WOLFSSL_STM32F1) + #include "stm32f1xx.h" + #elif defined(WOLFSSL_STM32F2) + #include "stm32f2xx.h" + #elif defined(WOLFSSL_STM32F4) + #include "stm32f4xx.h" + #elif defined(WOLFSSL_STM32F7) + #include "stm32f7xx.h" + #elif defined(WOLFSSL_STM32L4) + #include "stm32l4xx.h" + #elif defined(WOLFSSL_STM32L5) + #include "stm32l5xx.h" + #elif defined(WOLFSSL_STM32H7S) + #include "stm32h7rsxx.h" + #elif defined(WOLFSSL_STM32H7) + #include "stm32h7xx.h" + #elif defined(WOLFSSL_STM32WB) + #include "stm32wbxx.h" + #elif defined(WOLFSSL_STM32WL) + #include "stm32wlxx.h" + #elif defined(WOLFSSL_STM32G0) + #include "stm32g0xx.h" + #elif defined(WOLFSSL_STM32G4) + #include "stm32g4xx.h" + #elif defined(WOLFSSL_STM32U5) + #include "stm32u5xx.h" + #elif defined(WOLFSSL_STM32U3) + #include "stm32u3xx.h" + #elif defined(WOLFSSL_STM32H5) + #include "stm32h5xx.h" + #elif defined(WOLFSSL_STM32N6) + #include "stm32n6xx.h" + #elif defined(WOLFSSL_STM32MP13) + #ifndef __ASSEMBLER__ + #include "stm32mp13xx.h" + #endif + #elif defined(WOLFSSL_STM32WBA) + #include "stm32wbaxx.h" + #endif + #elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32F1) #include "stm32f1xx_hal.h" #elif defined(WOLFSSL_STM32F2)