55 const double c = sqr(cos(
Utils::pi() * mesh_i * n));
61 return (1.0 + c * 2.0) / 3.0;
63 return (2.0 + c * (11.0 + c * 2.0)) / 15.0;
65 return (17.0 + c * (180.0 + c * (114.0 + c * 4.0))) / 315.0;
67 return (62.0 + c * (1072.0 + c * (1452.0 + c * (247.0 + c * 2.0)))) /
72 c * (83021.0 + c * (34096.0 + c * (2026.0 + c * 4.0))))) /
79 c * (349500.0 + c * (8166.0 + c * 4.0)))))) /
88 double alpha_L,
double *he_q) {
90 -mesh.x / 2 +
static_cast<int>(blockDim.x * blockIdx.x + threadIdx.x);
92 -mesh.y / 2 +
static_cast<int>(blockDim.y * blockIdx.y + threadIdx.y);
94 -mesh.z / 2 +
static_cast<int>(blockDim.z * blockIdx.z + threadIdx.z);
96 if ((nx >= mesh.x / 2) || (ny >= mesh.y / 2) || (nz >= mesh.z / 2))
99 const int lind = (nx + mesh.x / 2) * mesh.y * mesh.z +
100 (ny + mesh.y / 2) * mesh.z + (nz + mesh.z / 2);
102 if ((nx != 0) || (ny != 0) || (nz != 0)) {
103 const double alpha_L_i = 1. / alpha_L;
104 const double n2 = sqr(nx) + sqr(ny) + sqr(nz);
105 const double cs = p3m_analytic_cotangent_sum<cao>(nz, meshi.z) *
106 p3m_analytic_cotangent_sum<cao>(nx, meshi.x) *
107 p3m_analytic_cotangent_sum<cao>(ny, meshi.y);
108 const double ex = exp(-sqr(
Utils::pi() * alpha_L_i) * n2);
109 const double ex2 = sqr(ex);
113 auto const alias1 = ex2 / n2;
114 auto const d = alias1 - sqr(U2 * ex / cs) / n2;
124 int npart,
double sum_q2,
double alpha_L,
126 static thrust::device_vector<double> he_q;
127 he_q.resize(
static_cast<unsigned>(mesh[0] * mesh[1] * mesh[2]));
129 dim3 grid(std::max<unsigned>(1u,
static_cast<unsigned>(mesh[0] / 8 + 1)),
130 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[1] / 8 + 1)),
131 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[2] / 8 + 1)));
141 meshi.x = 1. / mesh[0];
142 meshi.y = 1. / mesh[1];
143 meshi.z = 1. / mesh[2];
147 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<1>, grid,
block, mesh3, meshi,
148 alpha_L, thrust::raw_pointer_cast(he_q.data()));
151 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<2>, grid,
block, mesh3, meshi,
152 alpha_L, thrust::raw_pointer_cast(he_q.data()));
155 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<3>, grid,
block, mesh3, meshi,
156 alpha_L, thrust::raw_pointer_cast(he_q.data()));
159 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<4>, grid,
block, mesh3, meshi,
160 alpha_L, thrust::raw_pointer_cast(he_q.data()));
163 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<5>, grid,
block, mesh3, meshi,
164 alpha_L, thrust::raw_pointer_cast(he_q.data()));
167 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<6>, grid,
block, mesh3, meshi,
168 alpha_L, thrust::raw_pointer_cast(he_q.data()));
171 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<7>, grid,
block, mesh3, meshi,
172 alpha_L, thrust::raw_pointer_cast(he_q.data()));
176 auto const he_q_final = thrust::reduce(he_q.begin(), he_q.end());
178 return 2 * prefactor * sum_q2 * sqrt(he_q_final / npart) / (box[1] * box[2]);
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)