@@ -26,6 +26,8 @@ The class dealing with the Lennard-Jones (LJ) pairwise potentials.
2626// best block size here: 128
2727#define BLOCK_SIZE_FORCE 128
2828
29+ #define USE_FIXED_NEIGHBOR
30+
2931LJ::LJ (FILE* fid, int num_types, int num_atoms)
3032{
3133 printf (" Use %d-element LJ potential with elements:\n " , num_types);
@@ -69,11 +71,11 @@ LJ::~LJ(void)
6971
7072// get U_ij and (d U_ij / d r_ij) / r_ij (the LJ potential)
7173static __device__ void
72- find_p2_and_f2 (double s6e4, double s12e4, double d12sq, double & p2, double & f2)
74+ find_p2_and_f2 (float s6e4, float s12e4, float d12sq, float & p2, float & f2)
7375{
74- double d12inv2 = 1.0 / d12sq;
75- double d12inv6 = d12inv2 * d12inv2 * d12inv2;
76- f2 = 6.0 * (s6e4 * d12inv6 - s12e4 * 2.0 * d12inv6 * d12inv6) * d12inv2;
76+ float d12inv2 = 1 .0f / d12sq;
77+ float d12inv6 = d12inv2 * d12inv2 * d12inv2;
78+ f2 = 6 .0f * (s6e4 * d12inv6 - s12e4 * 2 .0f * d12inv6 * d12inv6) * d12inv2;
7779 p2 = s12e4 * d12inv6 * d12inv6 - s6e4 * d12inv6;
7880}
7981
@@ -97,19 +99,19 @@ static __global__ void gpu_find_force(
9799 double * g_potential)
98100{
99101 int n1 = blockIdx .x * blockDim .x + threadIdx .x + N1; // particle index
100- double s_fx = 0.0 ; // force_x
101- double s_fy = 0.0 ; // force_y
102- double s_fz = 0.0 ; // force_z
103- double s_pe = 0.0 ; // potential energy
104- double s_sxx = 0.0 ; // virial_stress_xx
105- double s_sxy = 0.0 ; // virial_stress_xy
106- double s_sxz = 0.0 ; // virial_stress_xz
107- double s_syx = 0.0 ; // virial_stress_yx
108- double s_syy = 0.0 ; // virial_stress_yy
109- double s_syz = 0.0 ; // virial_stress_yz
110- double s_szx = 0.0 ; // virial_stress_zx
111- double s_szy = 0.0 ; // virial_stress_zy
112- double s_szz = 0.0 ; // virial_stress_zz
102+ float s_fx = 0 .0f ; // force_x
103+ float s_fy = 0 .0f ; // force_y
104+ float s_fz = 0 .0f ; // force_z
105+ float s_pe = 0 .0f ; // potential energy
106+ float s_sxx = 0 .0f ; // virial_stress_xx
107+ float s_sxy = 0 .0f ; // virial_stress_xy
108+ float s_sxz = 0 .0f ; // virial_stress_xz
109+ float s_syx = 0 .0f ; // virial_stress_yx
110+ float s_syy = 0 .0f ; // virial_stress_yy
111+ float s_syz = 0 .0f ; // virial_stress_yz
112+ float s_szx = 0 .0f ; // virial_stress_zx
113+ float s_szy = 0 .0f ; // virial_stress_zy
114+ float s_szz = 0 .0f ; // virial_stress_zz
113115
114116 if (n1 < N2) {
115117 int neighbor_number = g_neighbor_number[n1];
@@ -122,33 +124,33 @@ static __global__ void gpu_find_force(
122124 int n2 = g_neighbor_list[n1 + number_of_particles * i1];
123125 int type2 = g_type[n2];
124126
125- double x12 = g_x[n2] - x1;
126- double y12 = g_y[n2] - y1;
127- double z12 = g_z[n2] - z1;
127+ float x12 = g_x[n2] - x1;
128+ float y12 = g_y[n2] - y1;
129+ float z12 = g_z[n2] - z1;
128130 apply_mic (box, x12, y12, z12);
129- double d12sq = x12 * x12 + y12 * y12 + z12 * z12;
131+ float d12sq = x12 * x12 + y12 * y12 + z12 * z12;
130132
131- double p2, f2;
133+ float p2, f2;
132134 if (d12sq >= lj.cutoff_square [type1][type2]) {
133135 continue ;
134136 }
135137 find_p2_and_f2 (lj.s6e4 [type1][type2], lj.s12e4 [type1][type2], d12sq, p2, f2);
136138
137139 // treat two-body potential in the same way as many-body potential
138- double f12x = f2 * x12 * 0.5 ;
139- double f12y = f2 * y12 * 0.5 ;
140- double f12z = f2 * z12 * 0.5 ;
141- double f21x = -f12x;
142- double f21y = -f12y;
143- double f21z = -f12z;
140+ float f12x = f2 * x12 * 0 .5f ;
141+ float f12y = f2 * y12 * 0 .5f ;
142+ float f12z = f2 * z12 * 0 .5f ;
143+ float f21x = -f12x;
144+ float f21y = -f12y;
145+ float f21z = -f12z;
144146
145147 // accumulate force
146148 s_fx += f12x - f21x;
147149 s_fy += f12y - f21y;
148150 s_fz += f12z - f21z;
149151
150152 // accumulate potential energy and virial
151- s_pe += p2 * 0.5 ; // two-body potential
153+ s_pe += p2 * 0 .5f ; // two-body potential
152154 s_sxx += x12 * f21x;
153155 s_sxy += x12 * f21y;
154156 s_sxz += x12 * f21z;
0 commit comments