@@ -73,8 +73,9 @@ void update_box(double* box, const double* d_v, int N)
7373 gpuMemcpy (processed_v, d_processed_v, 9 * sizeof (double ), gpuMemcpyDeviceToHost);
7474 transpose9 (processed_v);
7575
76+ double fac = std::max (N, std::min (250 , 6 * N));
7677 for (int i = 0 ; i < 9 ; i++) {
77- processed_v[i] = processed_v[i] / N ;
78+ processed_v[i] = processed_v[i] / fac ;
7879 }
7980
8081 double result[9 ];
@@ -137,8 +138,10 @@ void get_force_temp(
137138 get_force_temp_kernel<<<blocksPerGrid, threadsPerBlock>>> (
138139 force_per_atom, force_temp, d_deform, N);
139140
141+ double fac = std::max (N, std::min (250 , 6 * N));
142+
140143 for (int m = 0 ; m < 9 ; m++) {
141- virial_cpu_deform[m] = virial_cpu_deform[m] / N ;
144+ virial_cpu_deform[m] = virial_cpu_deform[m] / fac ;
142145 }
143146
144147 gpuMemcpy (force_temp + N, virial_cpu_deform, 3 * sizeof (double ), gpuMemcpyHostToDevice);
@@ -199,7 +202,7 @@ void solveLinearEquation(const double* A, const double* B, double* X)
199202 }
200203 }
201204
202- // 将计算得到的结果存储回 X 中(行主序)
205+ // row-major
203206 for (int i = 0 ; i < N; ++i) {
204207 for (int j = 0 ; j < N; ++j) {
205208 X[i * N + j] = b[i][j];
@@ -414,7 +417,7 @@ void Minimizer_FIRE_Box_Change::compute(
414417
415418 // minimize with changed box
416419 // create a velocity vector in GPU
417- GPU_Vector<double > v (size + 9 , 0 );
420+ GPU_Vector<double > v (size + 9 , 0.0 );
418421 GPU_Vector<double > temp1 (size + 9 );
419422 GPU_Vector<double > temp2 (size + 9 );
420423 GPU_Vector<double > force_temp (size + 9 );
@@ -428,8 +431,10 @@ void Minimizer_FIRE_Box_Change::compute(
428431 printf (" \n Energy minimization with changed box started.\n " );
429432
430433 for (int step = 0 ; step < number_of_steps_; ++step) {
434+
431435 force.compute (
432436 box, position_per_atom, type, group, potential_per_atom, force_per_atom, virial_per_atom);
437+
433438 // the virial tensor:
434439 // xx xy xz 0 3 4
435440 // yx yy yz 6 1 5
@@ -451,6 +456,7 @@ void Minimizer_FIRE_Box_Change::compute(
451456 // deform = np.linalg.solve(initial_box, current_box)
452457 solveLinearEquation<3 >(initial_box, box.cpu_h , deform);
453458 transpose9 (deform);
459+
454460 double virial_cpu[9 ];
455461 virialtot.copy_to_host (virial_cpu);
456462 transpose9 (virial_cpu);
@@ -481,6 +487,7 @@ void Minimizer_FIRE_Box_Change::compute(
481487 cpu_total_potential_[0 ],
482488 force_max,
483489 (virial_cpu[0 ] + virial_cpu[4 ] + virial_cpu[8 ]) / 3 . / box.get_volume () * 160.2176621 );
490+
484491 if (force_max < force_tolerance_)
485492 break ;
486493 }
@@ -490,15 +497,17 @@ void Minimizer_FIRE_Box_Change::compute(
490497 if (P > 0 ) {
491498 if (N_neg > N_min) {
492499 next_dt = dt * f_inc;
493- if (next_dt < dt_max)
494- dt = next_dt;
500+ if (next_dt > dt_max)
501+ next_dt = dt_max;
502+ dt = next_dt;
495503 alpha *= f_alpha;
496504 }
497505 N_neg++;
498506 } else {
499507 next_dt = dt * f_dec;
500- if (next_dt > dt_min)
501- dt = next_dt;
508+ if (next_dt < dt_min)
509+ next_dt = dt_min;
510+ dt = next_dt;
502511 alpha = alpha_start;
503512 // move position back
504513 scalar_multiply (-0.5 * dt, v, temp1);
@@ -512,6 +521,7 @@ void Minimizer_FIRE_Box_Change::compute(
512521 // implicit Euler integration
513522 double F_modulus = sqrt (dot (force_temp, force_temp));
514523 double v_modulus = sqrt (dot (v, v));
524+
515525 // dv = F/m*dt
516526 scalar_multiply (dt / m, force_temp, temp2);
517527 vector_sum (v, temp2, v);
0 commit comments