Skip to content

Commit ad8eb76

Browse files
authored
Merge pull request #8540 from douzzer/20250307-misc-xorbuf-optimizer
20250307-misc-xorbuf-optimizer
2 parents c3f2456 + a84831c commit ad8eb76

3 files changed

Lines changed: 116 additions & 63 deletions

File tree

configure.ac

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then
1006410064
fi
1006510065
fi
1006610066
10067+
AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \
10068+
test "$ENABLED_AESNI" != "no" || \
10069+
test "$ENABLED_ARMASM" != "no" || \
10070+
test "$ENABLED_RISCV_ASM" != "no" || \
10071+
test "$ENABLED_SP_ASM" != "no")],
10072+
[AC_MSG_WARN([Conflicting asm settings.])])
10073+
1006710074
# The following AM_CONDITIONAL statements set flags for use in the Makefiles.
1006810075
# Some of these affect build targets and objects, some trigger different
1006910076
# test scripts for make check.

wolfcrypt/src/misc.c

Lines changed: 109 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
407407
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
408408

409409
#ifndef WOLFSSL_NO_XOR_OPS
410+
411+
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
412+
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
413+
410414
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
411415
of wolfssl_words, placing the result in <*r>. */
412416
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
413417
const wolfssl_word** a, const wolfssl_word** b, word32 n)
414418
{
415-
word32 i;
419+
const wolfssl_word *e = *a + n;
416420

417-
for (i = 0; i < n; i++)
421+
while (*a < e)
418422
*((*r)++) = *((*a)++) ^ *((*b)++);
419423
}
420424

@@ -424,58 +428,78 @@ counts, placing the result in <*buf>. */
424428
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
425429
const void* mask, word32 count)
426430
{
427-
word32 i;
428-
byte* o;
429-
const byte* b;
430-
const byte* m;
431-
432-
o = (byte*)out;
433-
b = (const byte*)buf;
434-
m = (const byte*)mask;
435-
436-
437-
if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE ==
438-
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE &&
439-
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE ==
440-
((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) {
441-
/* type-punning helpers */
442-
union {
443-
byte* bp;
444-
wolfssl_word* wp;
445-
} tpo;
446-
union {
447-
const byte* bp;
448-
const wolfssl_word* wp;
449-
} tpb, tpm;
450-
/* Alignment checks out. Possible to XOR words. */
451-
/* Move alignment so that it lines up with a
452-
* WOLFSSL_WORD_SIZE boundary */
453-
while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) {
454-
*(o++) = (byte)(*(b++) ^ *(m++));
431+
byte* o = (byte*)out;
432+
const byte* b = (const byte*)buf;
433+
const byte* m = (const byte*)mask;
434+
435+
/* type-punning helpers */
436+
union {
437+
byte* bp;
438+
wolfssl_word* wp;
439+
} tpo;
440+
union {
441+
const byte* bp;
442+
const wolfssl_word* wp;
443+
} tpb, tpm;
444+
445+
if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
446+
((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
447+
((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0))
448+
{
449+
/* All buffers are already aligned. Possible to XOR by words without
450+
* fixup.
451+
*/
452+
453+
tpo.bp = o;
454+
tpb.bp = b;
455+
tpm.bp = m;
456+
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
457+
o = tpo.bp;
458+
b = tpb.bp;
459+
m = tpm.bp;
460+
count &= (WOLFSSL_WORD_SIZE - 1);
461+
}
462+
else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) ==
463+
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) &&
464+
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
465+
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
466+
{
467+
/* Alignment can be fixed up to allow XOR by words. */
468+
469+
/* Perform bytewise xor until pointers are aligned to
470+
* WOLFSSL_WORD_SIZE.
471+
*/
472+
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
473+
{
474+
*o++ = (byte)(*b++ ^ *m++);
455475
count--;
456476
}
477+
457478
tpo.bp = o;
458479
tpb.bp = b;
459480
tpm.bp = m;
460-
XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE);
481+
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
461482
o = tpo.bp;
462483
b = tpb.bp;
463484
m = tpm.bp;
464-
count %= WOLFSSL_WORD_SIZE;
485+
count &= (WOLFSSL_WORD_SIZE - 1);
486+
}
487+
488+
while (count > 0) {
489+
*o++ = (byte)(*b++ ^ *m++);
490+
count--;
465491
}
466492

467-
for (i = 0; i < count; i++)
468-
o[i] = (byte)(b[i] ^ m[i]);
469493
}
470494

471495
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
472496
of wolfssl_words, placing the result in <*r>. */
473497
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
474498
word32 n)
475499
{
476-
word32 i;
500+
const wolfssl_word *e = *a + n;
477501

478-
for (i = 0; i < n; i++)
502+
while (*a < e)
479503
*((*r)++) ^= *((*a)++);
480504
}
481505

@@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */
484508

485509
WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
486510
{
487-
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
488-
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
489-
490-
word32 i;
491-
byte* b;
492-
const byte* m;
493-
494-
b = (byte*)buf;
495-
m = (const byte*)mask;
511+
byte* b = (byte*)buf;
512+
const byte* m = (const byte*)mask;
513+
514+
/* type-punning helpers */
515+
union {
516+
byte* bp;
517+
wolfssl_word* wp;
518+
} tpb;
519+
union {
520+
const byte* bp;
521+
const wolfssl_word* wp;
522+
} tpm;
523+
524+
if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
525+
(((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0))
526+
{
527+
/* Both buffers are already aligned. Possible to XOR by words without
528+
* fixup.
529+
*/
496530

497-
if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
498-
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
531+
tpb.bp = b;
532+
tpm.bp = m;
533+
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
534+
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
535+
PRAGMA_GCC_DIAG_PUSH;
536+
PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
537+
#endif
538+
XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
539+
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
540+
PRAGMA_GCC_DIAG_POP;
541+
#endif
542+
b = tpb.bp;
543+
m = tpm.bp;
544+
count &= (WOLFSSL_WORD_SIZE - 1);
545+
}
546+
else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) ==
547+
((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)))
499548
{
500-
/* type-punning helpers */
501-
union {
502-
byte* bp;
503-
wolfssl_word* wp;
504-
} tpb;
505-
union {
506-
const byte* bp;
507-
const wolfssl_word* wp;
508-
} tpm;
509-
/* Alignment checks out. Possible to XOR words. */
510-
/* Move alignment so that it lines up with a
511-
* WOLFSSL_WORD_SIZE boundary */
512-
while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0)
549+
/* Alignment can be fixed up to allow XOR by words. */
550+
551+
/* Perform bytewise xor until pointers are aligned to
552+
* WOLFSSL_WORD_SIZE.
553+
*/
554+
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
513555
{
514556
*(b++) ^= *(m++);
515557
count--;
516558
}
559+
517560
tpb.bp = b;
518561
tpm.bp = m;
519562
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
@@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
530573
count &= (WOLFSSL_WORD_SIZE - 1);
531574
}
532575

533-
for (i = 0; i < count; i++)
534-
b[i] ^= m[i];
576+
while (count > 0) {
577+
*b++ ^= *m++;
578+
count--;
579+
}
535580
}
536-
#endif
581+
582+
#endif /* !WOLFSSL_NO_XOR_OPS */
537583

538584
#ifndef WOLFSSL_NO_FORCE_ZERO
539585
/* This routine fills the first len bytes of the memory area pointed by mem

0 commit comments

Comments
 (0)