diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index ea4c8eaaa93..886354cfcc0 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -1110,6 +1110,7 @@ __must_check __ppc64__ __ppc__ __riscv +__riscv_mul __riscv_xlen __s390x__ __sparc diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 4c9bedf6da0..72c224e7ae8 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -75620,7 +75620,9 @@ static void sp_256_get_entry_16_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -76000,7 +76002,9 @@ static void sp_256_get_entry_256_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -93643,7 +93647,9 @@ static void sp_384_get_entry_16_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -94039,7 +94045,9 @@ static void sp_384_get_entry_256_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -120766,7 +120774,9 @@ static void sp_521_get_entry_16_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -121182,7 +121192,9 @@ static void sp_521_get_entry_256_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 14ec8f17d67..ab946469154 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -101072,7 +101072,9 @@ static void sp_256_get_entry_16_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -101452,7 +101454,9 @@ static void sp_256_get_entry_256_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -111495,7 +111499,9 @@ static void sp_384_get_entry_16_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -111891,7 +111897,9 @@ static void sp_384_get_entry_256_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -124705,7 +124713,9 @@ static void sp_521_get_entry_16_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -125121,7 +125131,9 @@ static void sp_521_get_entry_256_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 44910f458cf..05256b3d6c5 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -114,6 +114,71 @@ #ifndef WOLFSSL_SP_ASM #if SP_WORD_SIZE == 32 +#ifdef SP_NO_MUL_INSTRUCTION +sp_uint64 __muldi3(sp_uint64 a, sp_uint64 b); +sp_uint64 __muldi3(sp_uint64 a, sp_uint64 b) +{ + sp_uint64 r; + sp_uint64 am[16]; + + /* if b is negative, convert it to positive and negate a. */ + r = 0 - (b >> 63); + a = a ^ r; + b = b ^ r; + a -= r; + b -= r; + +#if defined(WOLFSSL_SP_SMALL) + int i; + + am[0] = 0; + for (i = 1; i < 16; i++) { + am[i] = am[i-1] + a; + } + + r = am[(b >> 28) & 0xf]; + for (i = 24; i >= 0; i -= 4) { + r <<= 4; + r += am[(b >> i) & 0xf]; + } +#else + am[ 0] = 0; + am[ 1] = a; + am[ 2] = a << 1; + am[ 3] = am[ 2] + a; + am[ 4] = a << 2; + am[ 5] = am[ 4] + a; + am[ 6] = am[ 5] + a; + am[ 7] = am[ 6] + a; + am[ 8] = a << 3; + am[ 9] = am[ 8] + a; + am[10] = am[ 9] + a; + am[11] = am[10] + a; + am[12] = am[11] + a; + am[13] = am[12] + a; + am[14] = am[13] + a; + am[15] = am[14] + a; + + r = am[(b >> 28) & 0xf]; + r <<= 4; + r += am[(b >> 24) & 0xf]; + r <<= 4; + r += am[(b >> 20) & 0xf]; + r <<= 4; + r += am[(b >> 16) & 0xf]; + r <<= 4; + r += am[(b >> 12) & 0xf]; + r <<= 4; + r += am[(b >> 8) & 0xf]; + r <<= 4; + r += am[(b >> 4) & 0xf]; + r <<= 4; + r += am[(b >> 0) & 0xf]; +#endif + + return r; +} +#endif /* SP_NO_MUL_INSTRUCTION */ #define SP_PRINT_NUM(var, name, total, words, bits) \ do { \ int ii; \ @@ -21252,7 +21317,9 @@ static void sp_256_get_entry_256_9(sp_point_256* r, r->y[7] = 0; r->y[8] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -28357,7 +28424,9 @@ static void sp_384_get_entry_256_15(sp_point_384* r, r->y[13] = 0; r->y[14] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -35524,7 +35593,9 @@ static void sp_521_get_entry_256_21(sp_point_521* r, r->y[19] = 0; r->y[20] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index d3c08a73ac7..f4948e02033 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -21818,7 +21818,9 @@ static void sp_256_get_entry_256_5(sp_point_256* r, r->y[3] = 0; r->y[4] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint64)i - (sp_uint64)idx) >> 63) - 1); + sp_digit lte = (sp_digit)((((sp_uint64)idx - (sp_uint64)i) >> 63) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -28358,7 +28360,9 @@ static void sp_384_get_entry_256_7(sp_point_384* r, r->y[5] = 0; r->y[6] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint64)i - (sp_uint64)idx) >> 63) - 1); + sp_digit lte = (sp_digit)((((sp_uint64)idx - (sp_uint64)i) >> 63) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -35371,7 +35375,9 @@ static void sp_521_get_entry_256_9(sp_point_521* r, r->y[7] = 0; r->y[8] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint64)i - (sp_uint64)idx) >> 63) - 1); + sp_digit lte = (sp_digit)((((sp_uint64)idx - (sp_uint64)i) >> 63) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 975ed8b5092..9378e2b7903 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -37006,7 +37006,9 @@ static void sp_256_get_entry_16_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -37386,7 +37388,9 @@ static void sp_256_get_entry_256_8(sp_point_256* r, r->y[6] = 0; r->y[7] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -46949,7 +46953,9 @@ static void sp_384_get_entry_16_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -47345,7 +47351,9 @@ static void sp_384_get_entry_256_12(sp_point_384* r, r->y[10] = 0; r->y[11] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -58780,7 +58788,9 @@ static void sp_521_get_entry_16_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 16; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; @@ -59196,7 +59206,9 @@ static void sp_521_get_entry_256_17(sp_point_521* r, r->y[15] = 0; r->y[16] = 0; for (i = 1; i < 256; i++) { - mask = (sp_digit)0 - (i == idx); + sp_digit gte = (sp_digit)((((sp_uint32)i - (sp_uint32)idx) >> 31) - 1); + sp_digit lte = (sp_digit)((((sp_uint32)idx - (sp_uint32)i) >> 31) - 1); + mask = gte & lte; r->x[0] |= mask & table[i].x[0]; r->x[1] |= mask & table[i].x[1]; r->x[2] |= mask & table[i].x[2]; diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index d7575c33201..3f04d445c26 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -1,6 +1,6 @@ ; /* sp_x86_64_asm.asm */ ; /* -; * Copyright (C) 2006-2026 wolfSSL Inc. +; * Copyright (C) 2006-2026 wolfSSL Inc. ; * ; * This file is part of wolfSSL. ; * diff --git a/wolfssl/wolfcrypt/sp.h b/wolfssl/wolfcrypt/sp.h index c0f028a9b3a..9934a60e2a3 100644 --- a/wolfssl/wolfcrypt/sp.h +++ b/wolfssl/wolfcrypt/sp.h @@ -26,6 +26,10 @@ #include #include +#if defined(__riscv) && (__riscv_xlen == 32) && !defined(__riscv_mul) + #define SP_NO_MUL_INSTRUCTION +#endif + #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ defined(WOLFSSL_HAVE_SP_ECC) #ifdef _WIN32_WCE