Skip to content

Commit b41240d

Browse files
authored
Merge pull request brucefan1983#1160 from brucefan1983/speedup-ewald
Speedup ewald
2 parents 0b7bd20 + 68a3bec commit b41240d

2 files changed

Lines changed: 22 additions & 26 deletions

File tree

src/force/nep_charge.cu

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,19 +1263,17 @@ void NEP_Charge::find_k_and_G(const double* box)
12631263
for (int n2 = - n2_max; n2 <= n2_max; ++n2) {
12641264
for (int n3 = - n3_max; n3 <= n3_max; ++n3) {
12651265
const int nsq = n1 * n1 + n2 * n2 + n3 * n3;
1266-
if (nsq > 0) {
1267-
const float kx = n1 * b1[0] + n2 * b2[0] + n3 * b3[0];
1268-
const float ky = n1 * b1[1] + n2 * b2[1] + n3 * b3[1];
1269-
const float kz = n1 * b1[2] + n2 * b2[2] + n3 * b3[2];
1270-
const float ksq = kx * kx + ky * ky + kz * kz;
1271-
if (ksq < ksq_max) {
1272-
cpu_kx.emplace_back(kx);
1273-
cpu_ky.emplace_back(ky);
1274-
cpu_kz.emplace_back(kz);
1275-
float G = abs(two_pi_over_det) / ksq * exp(-ksq * charge_para.alpha_factor);
1276-
const float symmetry_factor = (n1 > 0) ? 2.0f : 1.0f;
1277-
cpu_G.emplace_back(symmetry_factor * G);
1278-
}
1266+
if (nsq == 0 || (n1 == 0 && n2 < 0) || (n1 == 0 && n2 == 0 && n3 < 0)) continue;
1267+
const float kx = n1 * b1[0] + n2 * b2[0] + n3 * b3[0];
1268+
const float ky = n1 * b1[1] + n2 * b2[1] + n3 * b3[1];
1269+
const float kz = n1 * b1[2] + n2 * b2[2] + n3 * b3[2];
1270+
const float ksq = kx * kx + ky * ky + kz * kz;
1271+
if (ksq < ksq_max) {
1272+
cpu_kx.emplace_back(kx);
1273+
cpu_ky.emplace_back(ky);
1274+
cpu_kz.emplace_back(kz);
1275+
const float G = abs(two_pi_over_det) / ksq * exp(-ksq * charge_para.alpha_factor);
1276+
cpu_G.emplace_back(2.0f * G);
12791277
}
12801278
}
12811279
}

src/main_nep/nep_charge.cu

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,19 +1255,17 @@ static __global__ void find_k_and_G(
12551255
for (int n2 = - n2_max; n2 <= n2_max; ++n2) {
12561256
for (int n3 = - n3_max; n3 <= n3_max; ++n3) {
12571257
const int nsq = n1 * n1 + n2 * n2 + n3 * n3;
1258-
if (nsq > 0) {
1259-
const float kx = n1 * b1[0] + n2 * b2[0] + n3 * b3[0];
1260-
const float ky = n1 * b1[1] + n2 * b2[1] + n3 * b3[1];
1261-
const float kz = n1 * b1[2] + n2 * b2[2] + n3 * b3[2];
1262-
const float ksq = kx * kx + ky * ky + kz * kz;
1263-
if (ksq < ksq_max) {
1264-
const int nc_nk = nc * num_kpoints_max + (nk++);
1265-
g_kx[nc_nk] = kx;
1266-
g_ky[nc_nk] = ky;
1267-
g_kz[nc_nk] = kz;
1268-
const float symmetry_factor = (n1 > 0) ? 2.0f : 1.0f;
1269-
g_G[nc_nk] = symmetry_factor * abs(two_pi_over_det) / ksq * exp(-ksq * alpha_factor);
1270-
}
1258+
if (nsq == 0 || (n1 == 0 && n2 < 0) || (n1 == 0 && n2 == 0 && n3 < 0)) continue;
1259+
const float kx = n1 * b1[0] + n2 * b2[0] + n3 * b3[0];
1260+
const float ky = n1 * b1[1] + n2 * b2[1] + n3 * b3[1];
1261+
const float kz = n1 * b1[2] + n2 * b2[2] + n3 * b3[2];
1262+
const float ksq = kx * kx + ky * ky + kz * kz;
1263+
if (ksq < ksq_max) {
1264+
const int nc_nk = nc * num_kpoints_max + (nk++);
1265+
g_kx[nc_nk] = kx;
1266+
g_ky[nc_nk] = ky;
1267+
g_kz[nc_nk] = kz;
1268+
g_G[nc_nk] = 2.0f * abs(two_pi_over_det) / ksq * exp(-ksq * alpha_factor);
12711269
}
12721270
}
12731271
}

0 commit comments

Comments
 (0)