Skip to content

Commit 146cfe4

Browse files
authored
Set-wise knight/bishop/rook attack generation (AVX2) (#914)
Ported #909 to AVX2. VSTC (AVX2) Elo | 3.01 +- 2.06 (95%) SPRT | 4.0+0.04s Threads=1 Hash=16MB LLR | 3.13 (-2.25, 2.89) [0.00, 3.00] Games | N: 28872 W: 7605 L: 7355 D: 13912 Penta | [133, 3123, 7696, 3329, 155] https://recklesschess.space/test/13611/ No functional change. Bench: 3344311
1 parent a7e1675 commit 146cfe4

1 file changed

Lines changed: 41 additions & 28 deletions

File tree

src/setwise.rs

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ pub fn knight_attacks_setwise(bb: Bitboard) -> Bitboard {
6262
}
6363
}
6464

65-
#[cfg(not(target_feature = "avx512f"))]
65+
#[cfg(not(target_feature = "avx2"))]
6666
#[inline]
6767
pub fn bishop_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
6868
use crate::lookup::bishop_attacks;
@@ -74,33 +74,28 @@ pub fn bishop_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
7474
result
7575
}
7676

77-
#[cfg(target_feature = "avx512f")]
77+
#[cfg(target_feature = "avx2")]
7878
#[inline]
7979
pub fn bishop_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
8080
use std::arch::x86_64::*;
8181

8282
unsafe {
83-
let attackers = _mm256_set1_epi64x(bb.0 as i64);
84-
let rotates1 = _mm256_set_epi64x(-9, -7, 7, 9);
85-
let rotates2 = _mm256_add_epi64(rotates1, rotates1);
86-
let rotates4 = _mm256_add_epi64(rotates2, rotates2);
87-
8883
let mask = _mm256_set_epi64x(!(R8 | H).0 as i64, !(R8 | A).0 as i64, !(R1 | H).0 as i64, !(R1 | A).0 as i64);
8984

90-
let generate = attackers;
85+
let generate = _mm256_set1_epi64x(bb.0 as i64);
9186
let propagate = _mm256_and_si256(_mm256_set1_epi64x(!occupancies.0 as i64), mask);
92-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates1)));
93-
let propagate = _mm256_and_si256(propagate, _mm256_rolv_epi64(propagate, rotates1));
94-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates2)));
95-
let propagate = _mm256_and_si256(propagate, _mm256_rolv_epi64(propagate, rotates2));
96-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates4)));
97-
let attacks = _mm256_and_si256(_mm256_rolv_epi64(generate, rotates1), mask);
87+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-9, -7, 7, 9>(generate)));
88+
let propagate = _mm256_and_si256(propagate, shiftv::<-9, -7, 7, 9>(propagate));
89+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-18, -14, 14, 18>(generate)));
90+
let propagate = _mm256_and_si256(propagate, shiftv::<-18, -14, 14, 18>(propagate));
91+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-36, -28, 28, 36>(generate)));
92+
let attacks = _mm256_and_si256(shiftv::<-9, -7, 7, 9>(generate), mask);
9893

9994
fold_to_bitboard(attacks)
10095
}
10196
}
10297

103-
#[cfg(not(target_feature = "avx512f"))]
98+
#[cfg(not(target_feature = "avx2"))]
10499
#[inline]
105100
pub fn rook_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
106101
use crate::lookup::rook_attacks;
@@ -112,32 +107,50 @@ pub fn rook_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
112107
result
113108
}
114109

115-
#[cfg(target_feature = "avx512f")]
110+
#[cfg(target_feature = "avx2")]
116111
#[inline]
117112
pub fn rook_attacks_setwise(bb: Bitboard, occupancies: Bitboard) -> Bitboard {
118113
use std::arch::x86_64::*;
119114

120115
unsafe {
121-
let attackers = _mm256_set1_epi64x(bb.0 as i64);
122-
let rotates1 = _mm256_set_epi64x(-8, -1, 1, 8);
123-
let rotates2 = _mm256_add_epi64(rotates1, rotates1);
124-
let rotates4 = _mm256_add_epi64(rotates2, rotates2);
125-
126116
let mask = _mm256_set_epi64x(!R8.0 as i64, !H.0 as i64, !A.0 as i64, !R1.0 as i64);
127117

128-
let generate = attackers;
118+
let generate = _mm256_set1_epi64x(bb.0 as i64);
129119
let propagate = _mm256_and_si256(_mm256_set1_epi64x(!occupancies.0 as i64), mask);
130-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates1)));
131-
let propagate = _mm256_and_si256(propagate, _mm256_rolv_epi64(propagate, rotates1));
132-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates2)));
133-
let propagate = _mm256_and_si256(propagate, _mm256_rolv_epi64(propagate, rotates2));
134-
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, _mm256_rolv_epi64(generate, rotates4)));
135-
let attacks = _mm256_and_si256(_mm256_rolv_epi64(generate, rotates1), mask);
120+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-8, -1, 1, 8>(generate)));
121+
let propagate = _mm256_and_si256(propagate, shiftv::<-8, -1, 1, 8>(propagate));
122+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-16, -2, 2, 16>(generate)));
123+
let propagate = _mm256_and_si256(propagate, shiftv::<-16, -2, 2, 16>(propagate));
124+
let generate = _mm256_or_si256(generate, _mm256_and_si256(propagate, shiftv::<-32, -4, 4, 32>(generate)));
125+
let attacks = _mm256_and_si256(shiftv::<-8, -1, 1, 8>(generate), mask);
136126

137127
fold_to_bitboard(attacks)
138128
}
139129
}
140130

131+
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512f")))]
132+
#[inline]
133+
unsafe fn shiftv<const A: i64, const B: i64, const C: i64, const D: i64>(
134+
vector: core::arch::x86_64::__m256i,
135+
) -> core::arch::x86_64::__m256i {
136+
use core::arch::x86_64::*;
137+
138+
_mm256_or_si256(
139+
_mm256_sllv_epi64(vector, _mm256_set_epi64x(A, B, C, D)),
140+
_mm256_srlv_epi64(vector, _mm256_set_epi64x(-A, -B, -C, -D)),
141+
)
142+
}
143+
144+
#[cfg(target_feature = "avx512f")]
145+
#[inline]
146+
unsafe fn shiftv<const A: i64, const B: i64, const C: i64, const D: i64>(
147+
vector: core::arch::x86_64::__m256i,
148+
) -> core::arch::x86_64::__m256i {
149+
use core::arch::x86_64::*;
150+
151+
_mm256_rolv_epi64(vector, _mm256_set_epi64x(A, B, C, D))
152+
}
153+
141154
#[cfg(target_feature = "avx2")]
142155
#[inline]
143156
unsafe fn fold_to_bitboard(vector: core::arch::x86_64::__m256i) -> Bitboard {

0 commit comments

Comments
 (0)