|
| 1 | +/* |
| 2 | + Copyright 2017 Zheyong Fan and GPUMD development team |
| 3 | + This file is part of GPUMD. |
| 4 | + GPUMD is free software: you can redistribute it and/or modify |
| 5 | + it under the terms of the GNU General Public License as published by |
| 6 | + the Free Software Foundation, either version 3 of the License, or |
| 7 | + (at your option) any later version. |
| 8 | + GPUMD is distributed in the hope that it will be useful, |
| 9 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | + GNU General Public License for more details. |
| 12 | + You should have received a copy of the GNU General Public License |
| 13 | + along with GPUMD. If not, see <http://www.gnu.org/licenses/>. |
| 14 | +*/ |
| 15 | + |
| 16 | +/*----------------------------------------------------------------------------80 |
| 17 | +The QTB thermostat based on a colored noise filter: |
| 18 | +[1] Dammak, T., et al. Phys. Rev. Lett. 103, 190601 (2009). |
| 19 | +------------------------------------------------------------------------------*/ |
| 20 | + |
| 21 | +#include "ensemble_qtb.cuh" |
| 22 | +#include "langevin_utilities.cuh" |
| 23 | +#include "utilities/common.cuh" |
| 24 | +#include "utilities/gpu_macro.cuh" |
| 25 | +#include <cmath> |
| 26 | +#include <cstdlib> |
| 27 | + |
| 28 | +namespace |
| 29 | +{ |
| 30 | +static __global__ void gpu_initialize_qtb_history( |
| 31 | + gpurandState* states, |
| 32 | + const int N, |
| 33 | + const int nfreq2, |
| 34 | + double* random_array_0, |
| 35 | + double* random_array_1, |
| 36 | + double* random_array_2) |
| 37 | +{ |
| 38 | + const int n = blockIdx.x * blockDim.x + threadIdx.x; |
| 39 | + if (n < N) { |
| 40 | + gpurandState state = states[n]; |
| 41 | + const int offset = n * nfreq2; |
| 42 | + for (int m = 0; m < nfreq2; ++m) { |
| 43 | + random_array_0[offset + m] = gpurand_normal_double(&state) / sqrt(12.0); |
| 44 | + random_array_1[offset + m] = gpurand_normal_double(&state) / sqrt(12.0); |
| 45 | + random_array_2[offset + m] = gpurand_normal_double(&state) / sqrt(12.0); |
| 46 | + } |
| 47 | + states[n] = state; |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +static __global__ void gpu_refresh_qtb_random_force( |
| 52 | + gpurandState* states, |
| 53 | + const int N, |
| 54 | + const int nfreq2, |
| 55 | + const double* time_H, |
| 56 | + const double gamma3_prefactor, |
| 57 | + const double* mass, |
| 58 | + double* random_array_0, |
| 59 | + double* random_array_1, |
| 60 | + double* random_array_2, |
| 61 | + double* fran_x, |
| 62 | + double* fran_y, |
| 63 | + double* fran_z) |
| 64 | +{ |
| 65 | + const int n = blockIdx.x * blockDim.x + threadIdx.x; |
| 66 | + if (n < N) { |
| 67 | + gpurandState state = states[n]; |
| 68 | + const int offset = n * nfreq2; |
| 69 | + |
| 70 | + for (int m = 0; m < nfreq2 - 1; ++m) { |
| 71 | + random_array_0[offset + m] = random_array_0[offset + m + 1]; |
| 72 | + random_array_1[offset + m] = random_array_1[offset + m + 1]; |
| 73 | + random_array_2[offset + m] = random_array_2[offset + m + 1]; |
| 74 | + } |
| 75 | + random_array_0[offset + nfreq2 - 1] = gpurand_normal_double(&state) / sqrt(12.0); |
| 76 | + random_array_1[offset + nfreq2 - 1] = gpurand_normal_double(&state) / sqrt(12.0); |
| 77 | + random_array_2[offset + nfreq2 - 1] = gpurand_normal_double(&state) / sqrt(12.0); |
| 78 | + |
| 79 | + double sum_x = 0.0; |
| 80 | + double sum_y = 0.0; |
| 81 | + double sum_z = 0.0; |
| 82 | + for (int m = 0; m < nfreq2; ++m) { |
| 83 | + const int reverse_index = offset + nfreq2 - m - 1; |
| 84 | + const double h = time_H[m]; |
| 85 | + sum_x += h * random_array_0[reverse_index]; |
| 86 | + sum_y += h * random_array_1[reverse_index]; |
| 87 | + sum_z += h * random_array_2[reverse_index]; |
| 88 | + } |
| 89 | + |
| 90 | + const double gamma3 = gamma3_prefactor * sqrt(mass[n]); |
| 91 | + fran_x[n] = sum_x * gamma3; |
| 92 | + fran_y[n] = sum_y * gamma3; |
| 93 | + fran_z[n] = sum_z * gamma3; |
| 94 | + |
| 95 | + states[n] = state; |
| 96 | + } |
| 97 | +} |
| 98 | + |
| 99 | +static __global__ void gpu_apply_qtb_half_step( |
| 100 | + const int N, |
| 101 | + const double dt_half, |
| 102 | + const double fric_coef, |
| 103 | + const double* mass, |
| 104 | + const double* fran_x, |
| 105 | + const double* fran_y, |
| 106 | + const double* fran_z, |
| 107 | + double* vx, |
| 108 | + double* vy, |
| 109 | + double* vz) |
| 110 | +{ |
| 111 | + const int n = blockIdx.x * blockDim.x + threadIdx.x; |
| 112 | + if (n < N) { |
| 113 | + const double mass_inv = 1.0 / mass[n]; |
| 114 | + vx[n] += dt_half * (fran_x[n] * mass_inv - fric_coef * vx[n]); |
| 115 | + vy[n] += dt_half * (fran_y[n] * mass_inv - fric_coef * vy[n]); |
| 116 | + vz[n] += dt_half * (fran_z[n] * mass_inv - fric_coef * vz[n]); |
| 117 | + } |
| 118 | +} |
| 119 | +} // namespace |
| 120 | + |
| 121 | +Ensemble_QTB::Ensemble_QTB( |
| 122 | + int t, |
| 123 | + int N, |
| 124 | + double T, |
| 125 | + double Tc, |
| 126 | + double dt_input, |
| 127 | + double f_max_input, |
| 128 | + int N_f_input, |
| 129 | + int seed_input) |
| 130 | +{ |
| 131 | + type = t; |
| 132 | + number_of_atoms = N; |
| 133 | + temperature = T; |
| 134 | + temperature_coupling = Tc; |
| 135 | + dt = dt_input; |
| 136 | + seed = seed_input; |
| 137 | + N_f = N_f_input; |
| 138 | + nfreq2 = 2 * N_f; |
| 139 | + |
| 140 | + // Input f_max uses ps^-1. Convert to internal time unit. |
| 141 | + f_max_natural = f_max_input * TIME_UNIT_CONVERSION / 1000.0; |
| 142 | + |
| 143 | + int alpha_estimate = int(1.0 / (2.0 * f_max_natural * dt)); |
| 144 | + if (alpha_estimate < 1) { |
| 145 | + alpha = 1; |
| 146 | + } else { |
| 147 | + alpha = alpha_estimate; |
| 148 | + } |
| 149 | + |
| 150 | + h_timestep = alpha * dt; |
| 151 | + fric_coef = 1.0 / (temperature_coupling * dt); |
| 152 | + counter_mu = 0; |
| 153 | + last_filter_temperature = -1.0; |
| 154 | + |
| 155 | + time_H_host.resize(nfreq2, 0.0); |
| 156 | + time_H_device.resize(nfreq2); |
| 157 | + |
| 158 | + random_array_0.resize(size_t(number_of_atoms) * size_t(nfreq2)); |
| 159 | + random_array_1.resize(size_t(number_of_atoms) * size_t(nfreq2)); |
| 160 | + random_array_2.resize(size_t(number_of_atoms) * size_t(nfreq2)); |
| 161 | + fran.resize(size_t(number_of_atoms) * 3); |
| 162 | + |
| 163 | + curand_states.resize(number_of_atoms); |
| 164 | + initialize_curand_states<<<(number_of_atoms - 1) / 128 + 1, 128>>>( |
| 165 | + curand_states.data(), number_of_atoms, seed); |
| 166 | + GPU_CHECK_KERNEL |
| 167 | + |
| 168 | + gpu_initialize_qtb_history<<<(number_of_atoms - 1) / 128 + 1, 128>>>( |
| 169 | + curand_states.data(), |
| 170 | + number_of_atoms, |
| 171 | + nfreq2, |
| 172 | + random_array_0.data(), |
| 173 | + random_array_1.data(), |
| 174 | + random_array_2.data()); |
| 175 | + GPU_CHECK_KERNEL |
| 176 | +} |
| 177 | + |
| 178 | +Ensemble_QTB::~Ensemble_QTB(void) |
| 179 | +{ |
| 180 | + // nothing |
| 181 | +} |
| 182 | + |
| 183 | +void Ensemble_QTB::update_time_filter(const double target_temperature) |
| 184 | +{ |
| 185 | + if (fabs(target_temperature - last_filter_temperature) < 1.0e-12) { |
| 186 | + return; |
| 187 | + } |
| 188 | + |
| 189 | + std::vector<double> omega_H(nfreq2, 0.0); |
| 190 | + |
| 191 | + for (int k = 0; k < nfreq2; ++k) { |
| 192 | + const double k_shift = k - N_f; |
| 193 | + const double f_k = k_shift / (nfreq2 * h_timestep); |
| 194 | + |
| 195 | + if (k == N_f) { |
| 196 | + omega_H[k] = sqrt(K_B * target_temperature); |
| 197 | + continue; |
| 198 | + } |
| 199 | + |
| 200 | + const double energy_k = 2.0 * PI * HBAR * fabs(f_k); |
| 201 | + const double x = energy_k / (K_B * target_temperature); |
| 202 | + double qfactor = 0.5; |
| 203 | + if (x < 200.0) { |
| 204 | + qfactor += 1.0 / (exp(x) - 1.0); |
| 205 | + } |
| 206 | + |
| 207 | + omega_H[k] = sqrt(energy_k * qfactor); |
| 208 | + const double numerator = sin(k_shift * PI / (2.0 * alpha * N_f)); |
| 209 | + const double denominator = sin(k_shift * PI / (2.0 * N_f)); |
| 210 | + omega_H[k] *= alpha * numerator / denominator; |
| 211 | + } |
| 212 | + |
| 213 | + for (int n = 0; n < nfreq2; ++n) { |
| 214 | + double value = 0.0; |
| 215 | + const double t_n = n - N_f; |
| 216 | + for (int k = 0; k < nfreq2; ++k) { |
| 217 | + const double omega_k = (k - N_f) * PI / N_f; |
| 218 | + value += omega_H[k] * cos(omega_k * t_n); |
| 219 | + } |
| 220 | + time_H_host[n] = value / nfreq2; |
| 221 | + } |
| 222 | + |
| 223 | + time_H_device.copy_from_host(time_H_host.data()); |
| 224 | + last_filter_temperature = target_temperature; |
| 225 | +} |
| 226 | + |
| 227 | +void Ensemble_QTB::refresh_colored_random_force() |
| 228 | +{ |
| 229 | + const double gamma3_prefactor = sqrt(2.0 * fric_coef * 12.0 / h_timestep); |
| 230 | + gpu_refresh_qtb_random_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>( |
| 231 | + curand_states.data(), |
| 232 | + number_of_atoms, |
| 233 | + nfreq2, |
| 234 | + time_H_device.data(), |
| 235 | + gamma3_prefactor, |
| 236 | + atom->mass.data(), |
| 237 | + random_array_0.data(), |
| 238 | + random_array_1.data(), |
| 239 | + random_array_2.data(), |
| 240 | + fran.data(), |
| 241 | + fran.data() + number_of_atoms, |
| 242 | + fran.data() + number_of_atoms * 2); |
| 243 | + GPU_CHECK_KERNEL |
| 244 | +} |
| 245 | + |
| 246 | +void Ensemble_QTB::apply_qtb_half_step() |
| 247 | +{ |
| 248 | + const double dt_half = 0.5 * dt; |
| 249 | + |
| 250 | + gpu_apply_qtb_half_step<<<(number_of_atoms - 1) / 128 + 1, 128>>>( |
| 251 | + number_of_atoms, |
| 252 | + dt_half, |
| 253 | + fric_coef, |
| 254 | + atom->mass.data(), |
| 255 | + fran.data(), |
| 256 | + fran.data() + number_of_atoms, |
| 257 | + fran.data() + 2 * number_of_atoms, |
| 258 | + atom->velocity_per_atom.data(), |
| 259 | + atom->velocity_per_atom.data() + number_of_atoms, |
| 260 | + atom->velocity_per_atom.data() + 2 * number_of_atoms); |
| 261 | + GPU_CHECK_KERNEL |
| 262 | + |
| 263 | + gpu_find_momentum<<<4, 1024>>>( |
| 264 | + number_of_atoms, |
| 265 | + atom->mass.data(), |
| 266 | + atom->velocity_per_atom.data(), |
| 267 | + atom->velocity_per_atom.data() + number_of_atoms, |
| 268 | + atom->velocity_per_atom.data() + 2 * number_of_atoms); |
| 269 | + GPU_CHECK_KERNEL |
| 270 | + |
| 271 | + gpu_correct_momentum<<<(number_of_atoms - 1) / 128 + 1, 128>>>( |
| 272 | + number_of_atoms, |
| 273 | + atom->velocity_per_atom.data(), |
| 274 | + atom->velocity_per_atom.data() + number_of_atoms, |
| 275 | + atom->velocity_per_atom.data() + 2 * number_of_atoms); |
| 276 | + GPU_CHECK_KERNEL |
| 277 | +} |
| 278 | + |
| 279 | +void Ensemble_QTB::compute1( |
| 280 | + const double time_step, |
| 281 | + const std::vector<Group>& group, |
| 282 | + Box& box, |
| 283 | + Atom& atom, |
| 284 | + GPU_Vector<double>& thermo) |
| 285 | +{ |
| 286 | + if (counter_mu == 0) { |
| 287 | + update_time_filter(temperature); |
| 288 | + refresh_colored_random_force(); |
| 289 | + } |
| 290 | + |
| 291 | + apply_qtb_half_step(); |
| 292 | + |
| 293 | +#ifdef USE_NEPCG |
| 294 | + velocity_verlet_cg( |
| 295 | + true, |
| 296 | + time_step, |
| 297 | + group, |
| 298 | + atom.mass, |
| 299 | + atom.force_per_atom, |
| 300 | + atom.position_per_atom, |
| 301 | + atom.velocity_per_atom); |
| 302 | +#else |
| 303 | + velocity_verlet( |
| 304 | + true, |
| 305 | + time_step, |
| 306 | + group, |
| 307 | + atom.mass, |
| 308 | + atom.force_per_atom, |
| 309 | + atom.position_per_atom, |
| 310 | + atom.velocity_per_atom); |
| 311 | +#endif |
| 312 | +} |
| 313 | + |
| 314 | +void Ensemble_QTB::compute2( |
| 315 | + const double time_step, |
| 316 | + const std::vector<Group>& group, |
| 317 | + Box& box, |
| 318 | + Atom& atom, |
| 319 | + GPU_Vector<double>& thermo) |
| 320 | +{ |
| 321 | +#ifdef USE_NEPCG |
| 322 | + velocity_verlet_cg( |
| 323 | + false, |
| 324 | + time_step, |
| 325 | + group, |
| 326 | + atom.mass, |
| 327 | + atom.force_per_atom, |
| 328 | + atom.position_per_atom, |
| 329 | + atom.velocity_per_atom); |
| 330 | +#else |
| 331 | + velocity_verlet( |
| 332 | + false, |
| 333 | + time_step, |
| 334 | + group, |
| 335 | + atom.mass, |
| 336 | + atom.force_per_atom, |
| 337 | + atom.position_per_atom, |
| 338 | + atom.velocity_per_atom); |
| 339 | +#endif |
| 340 | + |
| 341 | + apply_qtb_half_step(); |
| 342 | + |
| 343 | + find_thermo( |
| 344 | + true, |
| 345 | + box.get_volume(), |
| 346 | + group, |
| 347 | + atom.mass, |
| 348 | + atom.potential_per_atom, |
| 349 | + atom.velocity_per_atom, |
| 350 | + atom.virial_per_atom, |
| 351 | + thermo); |
| 352 | + |
| 353 | + counter_mu = (counter_mu + 1) % alpha; |
| 354 | +} |
0 commit comments