Skip to content

Commit de91add

Browse files
authored
Merge pull request #7182 from SparkiDev/armv8_32_align_chacha20_asm
ARMv8 32 bit ChaCha20 ASM: loading from in/out
2 parents de4a6f9 + 4585c6d commit de91add

1 file changed

Lines changed: 18 additions & 16 deletions

File tree

wolfcrypt/src/port/arm/armv8-chacha.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,7 +1666,10 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS
16661666
"VADD.I32 q6, q6, q12 \n\t"
16671667
"VADD.I32 q7, q7, q13 \n\t"
16681668

1669-
"VLDM %[m], { q8-q15 } \n\t"
1669+
"VLD1.8 { q8, q9 }, [%[m]]! \n\t"
1670+
"VLD1.8 { q10, q11 }, [%[m]]! \n\t"
1671+
"VLD1.8 { q12, q13 }, [%[m]]! \n\t"
1672+
"VLD1.8 { q14, q15 }, [%[m]]! \n\t"
16701673
"VEOR q0, q0, q8 \n\t"
16711674
"VEOR q1, q1, q9 \n\t"
16721675
"VEOR q2, q2, q10 \n\t"
@@ -1675,7 +1678,10 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS
16751678
"VEOR q5, q5, q13 \n\t"
16761679
"VEOR q6, q6, q14 \n\t"
16771680
"VEOR q7, q7, q15 \n\t"
1678-
"VSTM %[c], { q0-q7 } \n\t"
1681+
"VST1.8 { q0, q1 }, [%[c]]! \n\t"
1682+
"VST1.8 { q2, q3 }, [%[c]]! \n\t"
1683+
"VST1.8 { q4, q5 }, [%[c]]! \n\t"
1684+
"VST1.8 { q6, q7 }, [%[c]]! \n\t"
16791685

16801686
: [c] "+r" (c), [m] "+r" (m)
16811687
: [rounds] "I" (ROUNDS/2), [input] "r" (input),
@@ -2725,14 +2731,14 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
27252731
"CMP %[bytes], #64 \n\t"
27262732
"BLT L_chacha20_arm32_64_lt_64_%= \n\t"
27272733
/* XOR full 64 byte block */
2728-
"VLDM %[m], { q4-q7 } \n\t"
2729-
"ADD %[m], %[m], #64 \n\t"
2734+
"VLD1.8 { q4, q5 }, [%[m]]! \n\t"
2735+
"VLD1.8 { q6, q7 }, [%[m]]! \n\t"
27302736
"VEOR q0, q0, q4 \n\t"
27312737
"VEOR q1, q1, q5 \n\t"
27322738
"VEOR q2, q2, q6 \n\t"
27332739
"VEOR q3, q3, q7 \n\t"
2734-
"VSTM %[c], { q0-q3 } \n\t"
2735-
"ADD %[c], %[c], #64 \n\t"
2740+
"VST1.8 { q0, q1 }, [%[c]]! \n\t"
2741+
"VST1.8 { q2, q3 }, [%[c]]! \n\t"
27362742
"SUBS %[bytes], %[bytes], #64 \n\t"
27372743
"VADD.I32 q11, q11, q14 \n\t"
27382744
"BNE L_chacha20_arm32_64_outer_loop_%= \n\t"
@@ -2743,12 +2749,10 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
27432749
/* XOR 32 bytes */
27442750
"CMP %[bytes], #32 \n\t"
27452751
"BLT L_chacha20_arm32_64_lt_32_%= \n\t"
2746-
"VLDM %[m], { q4-q5 } \n\t"
2747-
"ADD %[m], %[m], #32 \n\t"
2752+
"VLD1.8 { q4, q5 }, [%[m]]! \n\t"
27482753
"VEOR q4, q4, q0 \n\t"
27492754
"VEOR q5, q5, q1 \n\t"
2750-
"VSTM %[c], { q4-q5 } \n\t"
2751-
"ADD %[c], %[c], #32 \n\t"
2755+
"VST1.8 { q4, q5 }, [%[c]]! \n\t"
27522756
"SUBS %[bytes], %[bytes], #32 \n\t"
27532757
"VMOV q0, q2 \n\t"
27542758
"VMOV q1, q3 \n\t"
@@ -2758,11 +2762,9 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
27582762
/* XOR 16 bytes */
27592763
"CMP %[bytes], #16 \n\t"
27602764
"BLT L_chacha20_arm32_64_lt_16_%= \n\t"
2761-
"VLDM %[m], { q4 } \n\t"
2762-
"ADD %[m], %[m], #16 \n\t"
2765+
"VLD1.8 { q4 }, [%[m]]! \n\t"
27632766
"VEOR q4, q4, q0 \n\t"
2764-
"VSTM %[c], { q4 } \n\t"
2765-
"ADD %[c], %[c], #16 \n\t"
2767+
"VST1.8 { q4 }, [%[c]]! \n\t"
27662768
"SUBS %[bytes], %[bytes], #16 \n\t"
27672769
"VMOV q0, q1 \n\t"
27682770
"BEQ L_chacha20_arm32_64_done_%= \n\t"
@@ -2771,9 +2773,9 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
27712773
/* XOR 8 bytes */
27722774
"CMP %[bytes], #8 \n\t"
27732775
"BLT L_chacha20_arm32_64_lt_8_%= \n\t"
2774-
"VLD1.64 { d8 }, [%[m]]! \n\t"
2776+
"VLD1.8 { d8 }, [%[m]]! \n\t"
27752777
"VEOR d8, d8, d0 \n\t"
2776-
"VST1.64 { d8 }, [%[c]]! \n\t"
2778+
"VST1.8 { d8 }, [%[c]]! \n\t"
27772779
"SUBS %[bytes], %[bytes], #8 \n\t"
27782780
"VMOV d0, d1 \n\t"
27792781
"BEQ L_chacha20_arm32_64_done_%= \n\t"

0 commit comments

Comments
 (0)