![]() |
ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
|
00001 /* 00002 Copyright (C) 2010,2011,2012,2013 The ESPResSo project 00003 Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 00004 Max-Planck-Institute for Polymer Research, Theory Group 00005 00006 This file is part of ESPResSo. 00007 00008 ESPResSo is free software: you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation, either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 ESPResSo is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program. If not, see <http://www.gnu.org/licenses/>. 00020 */ 00021 #include <mpi.h> 00022 #include <stdio.h> 00023 #include <stdlib.h> 00024 #include <string.h> 00025 00026 #include "utils.h" 00027 #include "integrate.h" 00028 #include "initialize.h" 00029 #include "global.h" 00030 #include "grid.h" 00031 #include "domain_decomposition.h" 00032 #include "particle_data.h" 00033 #include "communication.h" 00034 #include "fft.h" 00035 #include "p3m.h" 00036 #include "thermostat.h" 00037 #include "cells.h" 00038 #include "tuning.h" 00039 #include "elc.h" 00040 00041 #ifdef P3M 00042 00043 /************************************************ 00044 * variables 00045 ************************************************/ 00046 p3m_data_struct p3m; 00047 00048 /* MPI tags for the charge-charge p3m communications: */ 00049 /** Tag for communication in P3M_init() -> send_calc_mesh(). */ 00050 #define REQ_P3M_INIT 200 00051 /** Tag for communication in p3m_gather_fft_grid(). */ 00052 #define REQ_P3M_GATHER 201 00053 /** Tag for communication in p3m_spread_force_grid(). */ 00054 #define REQ_P3M_SPREAD 202 00055 00056 /* Index helpers for direct and reciprocal space 00057 * After the FFT the data is in order YZX, which 00058 * means that Y is the slowest changing index. 00059 * The defines are here to not get confused and 00060 * be able to easily change the order. 00061 */ 00062 #define RX 0 00063 #define RY 1 00064 #define RZ 2 00065 #define KY 0 00066 #define KZ 1 00067 #define KX 2 00068 00069 /** \name Private Functions */ 00070 /************************************************************/ 00071 /*@{*/ 00072 00073 #ifdef P3M_DEBUG 00074 static void p3m_print(void) { 00075 fprintf(stderr, "general information: \n\t node: %d \n\t box_l: (%lf, %lf, %lf)\n", this_node, box_l[0], box_l[1], box_l[2]); 00076 00077 fprintf(stderr, "p3m parameters:\n\t alpha_L: %lf\n\t r_cut_iL: %lf\n\t \ 00078 mesh: (%d, %d, %d)\n\t mesh_off: (%lf, %lf, %lf)\n\t \ 00079 cao: %d\n\t inter: %d\n\t accuracy: %lf\n\t epsilon: %lf\n\t \ 00080 cao_cut: (%lf, %lf, %lf)\n\t a: (%lf,%lf,%lf)\n\t \ 00081 ai: (%lf,%lf,%lf)\n\t alpha: %lf\n\t r_cut: %lf\n\t \ 00082 inter2: %d\n\t cao3: %d\n\t additional_mesh: (%lf,%lf,%lf)\n", \ 00083 p3m.params.alpha_L,p3m.params.r_cut_iL, p3m.params.mesh[0], p3m.params.mesh[1], p3m.params.mesh[2], p3m.params.mesh_off[0], p3m.params.mesh_off[1], p3m.params.mesh_off[2], \ 00084 p3m.params.cao, p3m.params.inter, p3m.params.accuracy, p3m.params.epsilon, p3m.params.cao_cut[0], p3m.params.cao_cut[1], p3m.params.cao_cut[2], p3m.params.a[0], p3m.params.a[1], p3m.params.a[2], p3m.params.ai[0], p3m.params.ai[1], p3m.params.ai[2], \ 00085 p3m.params.alpha, p3m.params.r_cut, p3m.params.inter2, p3m.params.cao3, p3m.params.additional_mesh[0], p3m.params.additional_mesh[1], p3m.params.additional_mesh[2]); 00086 } 00087 00088 #endif 00089 00090 /** Calculates for charges the properties of the send/recv sub-meshes of the local FFT mesh. 00091 * In order to calculate the recv sub-meshes there is a communication of 00092 * the margins between neighbouring nodes. */ 00093 static void p3m_calc_send_mesh(); 00094 00095 00096 /** Initializes the (inverse) mesh constant \ref p3m_parameter_struct::a (\ref 00097 p3m_parameter_struct::ai) and the cutoff for charge assignment \ref 00098 p3m_parameter_struct::cao_cut, which has to be done by \ref p3m_init 00099 once and by \ref p3m_scaleby_box_l whenever the \ref box_l 00100 changed. */ 00101 static void p3m_init_a_ai_cao_cut(void); 00102 00103 00104 /** Calculate the spacial position of the left down mesh point of the local mesh, to be 00105 stored in \ref p3m_local_mesh::ld_pos; function called by \ref p3m_calc_local_ca_mesh once 00106 and by \ref p3m_scaleby_box_l whenever the \ref box_l changed. */ 00107 static void p3m_calc_lm_ld_pos(void); 00108 00109 00110 /** Calculates the dipole term */ 00111 static double p3m_calc_dipole_term(int force_flag, int energy_flag); 00112 00113 /** Gather FFT grid. 00114 * After the charge assignment Each node needs to gather the 00115 * information for the FFT grid in his spatial domain. 00116 */ 00117 static void p3m_gather_fft_grid(double* mesh); 00118 00119 /** Spread force grid. 00120 * After the k-space calculations each node needs to get all force 00121 * information to reassigne the forces from the grid to the 00122 * particles. 00123 */ 00124 static void p3m_spread_force_grid(double* mesh); 00125 00126 #ifdef P3M_STORE_CA_FRAC 00127 /** realloc charge assignment fields. */ 00128 static void p3m_realloc_ca_fields(int newsize); 00129 #endif 00130 00131 /** checks for correctness for charges in P3M of the cao_cut, necessary when the box length changes */ 00132 static int p3m_sanity_checks_boxl(void); 00133 00134 /** Calculate the spacial position of the left down mesh point of the local mesh, to be 00135 stored in \ref p3m_local_mesh::ld_pos; function called by \ref p3m_calc_local_ca_mesh once 00136 and by \ref p3m_scaleby_box_l whenever the \ref box_l changed. */ 00137 static void p3m_calc_lm_ld_pos(void); 00138 00139 /** Calculates properties of the local FFT mesh for the 00140 charge assignment process. */ 00141 static void p3m_calc_local_ca_mesh(void); 00142 00143 00144 /** Interpolates the P-th order charge assignment function from 00145 * Hockney/Eastwood 5-189 (or 8-61). The following charge fractions 00146 * are also tabulated in Deserno/Holm. */ 00147 static void p3m_interpolate_charge_assignment_function(void); 00148 00149 /** shifts the mesh points by mesh/2 */ 00150 static void p3m_calc_meshift(void); 00151 00152 /** Calculates the Fourier transformed differential operator. 00153 * Remark: This is done on the level of n-vectors and not k-vectors, 00154 * i.e. the prefactor i*2*PI/L is missing! */ 00155 static void p3m_calc_differential_operator(void); 00156 00157 /** Calculates the optimal influence function of Hockney and Eastwood. 00158 * (optimised for force calculations) 00159 * 00160 * Each node calculates only the values for its domain in k-space 00161 * (see fft.plan[3].mesh and fft.plan[3].start). 00162 * 00163 * See also: Hockney/Eastwood 8-22 (p275). Note the somewhat 00164 * different convention for the prefactors, which is described in 00165 * Deserno/Holm. */ 00166 static void p3m_calc_influence_function_force(void); 00167 00168 /** Calculates the influence function optimized for the energy and the 00169 self energy correction. */ 00170 static void p3m_calc_influence_function_energy(void); 00171 00172 00173 /** Calculates the aliasing sums for the optimal influence function. 00174 * 00175 * Calculates the aliasing sums in the nominator and denominator of 00176 * the expression for the optimal influence function (see 00177 * Hockney/Eastwood: 8-22, p. 275). 00178 * 00179 * \param n n-vector for which the aliasing sum is to be performed. 00180 * \param nominator aliasing sums in the nominator. 00181 * \return denominator aliasing sum in the denominator 00182 */ 00183 double p3m_perform_aliasing_sums_force(int n[3], double nominator[3]); 00184 double p3m_perform_aliasing_sums_energy(int n[3]); 00185 00186 /*@}*/ 00187 00188 00189 /** \name P3M Tuning Functions (private)*/ 00190 /************************************************************/ 00191 /*@{*/ 00192 00193 /** Calculates the real space contribution to the rms error in the force (as described 00194 by Kolafa and Perram). 00195 \param prefac Prefactor of coulomb interaction. 00196 \param r_cut_iL rescaled real space cutoff for p3m method. 00197 \param n_c_part number of charged particles in the system. 00198 \param sum_q2 sum of square of charges in the system 00199 \param alpha_L rescaled ewald splitting parameter. 00200 \return real space error 00201 */ 00202 static double p3m_real_space_error(double prefac, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L); 00203 00204 /** Calculate the analytic expression of the error estimate for the 00205 P3M method in the book of Hockney and Eastwood (Eqn. 8.23) in 00206 order to obtain the rms error in the force for a system of N 00207 randomly distributed particles in a cubic box (k space part). 00208 \param prefac Prefactor of coulomb interaction. 00209 \param mesh number of mesh points in one direction. 00210 \param cao charge assignment order. 00211 \param n_c_part number of charged particles in the system. 00212 \param sum_q2 sum of square of charges in the system 00213 \param alpha_L rescaled ewald splitting parameter. 00214 \return reciprocal (k) space error 00215 */ 00216 static double p3m_k_space_error(double prefac, int mesh[3], int cao, int n_c_part, double sum_q2, double alpha_L); 00217 00218 00219 00220 /** aliasing sum used by \ref p3m_k_space_error. */ 00221 static void p3m_tune_aliasing_sums(int nx, int ny, int nz, 00222 int mesh[3], double mesh_i[3], int cao, double alpha_L_i, 00223 double *alias1, double *alias2); 00224 00225 /*@}*/ 00226 00227 00228 void p3m_pre_init(void) { 00229 p3m_common_parameter_pre_init(&p3m.params); 00230 /* p3m.local_mesh is uninitialized */ 00231 /* p3m.sm is uninitialized */ 00232 00233 p3m.rs_mesh = NULL; 00234 p3m.ks_mesh = NULL; 00235 p3m.sum_qpart = 0; 00236 p3m.sum_q2 = 0.0; 00237 p3m.square_sum_q = 0.0; 00238 00239 for (int i = 0; i < 7; i++) 00240 p3m.int_caf[i] = NULL; 00241 p3m.pos_shift = 0.0; 00242 p3m.meshift_x = NULL; 00243 p3m.meshift_y = NULL; 00244 p3m.meshift_z = NULL; 00245 00246 p3m.d_op[0] = NULL; 00247 p3m.d_op[1] = NULL; 00248 p3m.d_op[2] = NULL; 00249 p3m.g_force = NULL; 00250 p3m.g_energy = NULL; 00251 00252 #ifdef P3M_STORE_CA_FRAC 00253 p3m.ca_num = 0; 00254 p3m.ca_frac = NULL; 00255 p3m.ca_fmp = NULL; 00256 #endif 00257 p3m.ks_pnum = 0; 00258 00259 p3m.send_grid = NULL; 00260 p3m.recv_grid = NULL; 00261 00262 fft_pre_init(); 00263 } 00264 00265 void p3m_free() 00266 { 00267 int i; 00268 /* free memory */ 00269 #ifdef P3M_STORE_CA_FRAC 00270 free(p3m.ca_frac); 00271 free(p3m.ca_fmp); 00272 #endif 00273 free(p3m.send_grid); 00274 free(p3m.recv_grid); 00275 free(p3m.rs_mesh); 00276 free(p3m.ks_mesh); 00277 for(i=0; i<p3m.params.cao; i++) free(p3m.int_caf[i]); 00278 } 00279 00280 void p3m_set_bjerrum() { 00281 p3m.params.alpha = 0.0; 00282 p3m.params.alpha_L = 0.0; 00283 p3m.params.r_cut = 0.0; 00284 p3m.params.r_cut_iL = 0.0; 00285 p3m.params.mesh[0] = 0; 00286 p3m.params.mesh[1] = 0; 00287 p3m.params.mesh[2] = 0; 00288 p3m.params.cao = 0; 00289 } 00290 00291 void p3m_init() { 00292 if(coulomb.bjerrum == 0.0) { 00293 p3m.params.r_cut = 0.0; 00294 p3m.params.r_cut_iL = 0.0; 00295 00296 00297 00298 if(this_node==0) 00299 P3M_TRACE(fprintf(stderr,"0: P3M_init: Bjerrum length is zero.\n"); 00300 00301 fprintf(stderr," Electrostatics switched off!\n")); 00302 } else { 00303 P3M_TRACE(fprintf(stderr,"%d: p3m_init: \n",this_node)); 00304 00305 if (p3m_sanity_checks()) return; 00306 00307 P3M_TRACE(fprintf(stderr,"%d: p3m_init: starting\n",this_node)); 00308 00309 P3M_TRACE(fprintf(stderr,"%d: mesh=%d, cao=%d, mesh_off=(%f,%f,%f)\n",this_node,p3m.params.mesh[0],p3m.params.cao,p3m.params.mesh_off[0],p3m.params.mesh_off[1],p3m.params.mesh_off[2])); 00310 p3m.params.cao3 = p3m.params.cao*p3m.params.cao*p3m.params.cao; 00311 00312 /* initializes the (inverse) mesh constant p3m.params.a (p3m.params.ai) and the cutoff for charge assignment p3m.params.cao_cut */ 00313 p3m_init_a_ai_cao_cut(); 00314 00315 #ifdef P3M_STORE_CA_FRAC 00316 /* initialize ca fields to size CA_INCREMENT: p3m.ca_frac and p3m.ca_fmp */ 00317 p3m.ca_num = 0; 00318 p3m_realloc_ca_fields(CA_INCREMENT); 00319 #endif 00320 00321 p3m_calc_local_ca_mesh(); 00322 00323 p3m_calc_send_mesh(); 00324 P3M_TRACE(p3m_p3m_print_local_mesh(p3m.local_mesh)); 00325 P3M_TRACE(p3m_p3m_print_send_mesh(p3m.sm)); 00326 p3m.send_grid = (double *) realloc(p3m.send_grid, sizeof(double)*p3m.sm.max); 00327 p3m.recv_grid = (double *) realloc(p3m.recv_grid, sizeof(double)*p3m.sm.max); 00328 00329 /* fix box length dependent constants */ 00330 p3m_scaleby_box_l(); 00331 00332 if (p3m.params.inter > 0) 00333 p3m_interpolate_charge_assignment_function(); 00334 00335 /* position offset for calc. of first meshpoint */ 00336 p3m.pos_shift = (double)((p3m.params.cao-1)/2) - (p3m.params.cao%2)/2.0; 00337 P3M_TRACE(fprintf(stderr,"%d: p3m.pos_shift = %f\n",this_node,p3m.pos_shift)); 00338 00339 /* FFT */ 00340 P3M_TRACE(fprintf(stderr,"%d: p3m.rs_mesh ADR=%p\n",this_node,p3m.rs_mesh)); 00341 00342 int ca_mesh_size = fft_init(&p3m.rs_mesh, 00343 p3m.local_mesh.dim,p3m.local_mesh.margin, 00344 p3m.params.mesh, p3m.params.mesh_off, 00345 &p3m.ks_pnum); 00346 p3m.ks_mesh = (double *) realloc(p3m.ks_mesh, ca_mesh_size*sizeof(double)); 00347 00348 00349 P3M_TRACE(fprintf(stderr,"%d: p3m.rs_mesh ADR=%p\n",this_node,p3m.rs_mesh)); 00350 00351 /* k-space part: */ 00352 p3m_calc_differential_operator(); 00353 p3m_calc_influence_function_force(); 00354 p3m_calc_influence_function_energy(); 00355 00356 p3m_count_charged_particles(); 00357 00358 P3M_TRACE(fprintf(stderr,"%d: p3m-charges initialized\n",this_node)); 00359 } 00360 } 00361 00362 void p3m_set_tune_params(double r_cut, int mesh, int cao, 00363 double alpha, double accuracy, int n_interpol) 00364 { 00365 if (r_cut >= 0) { 00366 p3m.params.r_cut = r_cut; 00367 p3m.params.r_cut_iL = r_cut*box_l_i[0]; 00368 } 00369 00370 if (mesh >= 0) 00371 p3m.params.mesh[2] = p3m.params.mesh[1] = p3m.params.mesh[0] = mesh; 00372 00373 if (cao >= 0) 00374 p3m.params.cao = cao; 00375 00376 if (alpha >= 0) { 00377 p3m.params.alpha = alpha; 00378 p3m.params.alpha_L = alpha*box_l[0]; 00379 } 00380 00381 if (accuracy >= 0) 00382 p3m.params.accuracy = accuracy; 00383 00384 if (n_interpol != -1) 00385 p3m.params.inter = n_interpol; 00386 00387 coulomb.prefactor = (temperature > 0) ? temperature*coulomb.bjerrum : coulomb.bjerrum; 00388 } 00389 00390 /*@}*/ 00391 00392 int p3m_set_params(double r_cut, int *mesh, int cao, 00393 double alpha, double accuracy) 00394 { 00395 if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M) 00396 coulomb.method = COULOMB_P3M; 00397 00398 if(r_cut < 0) 00399 return -1; 00400 00401 if((mesh[0] < 0) || (mesh[1] < 0) || (mesh[2] < 0)) 00402 return -2; 00403 00404 if(cao < 1 || cao > 7 || cao > mesh[0] || cao > mesh[1] || cao > mesh[2] ) 00405 return -3; 00406 00407 p3m.params.r_cut = r_cut; 00408 p3m.params.r_cut_iL = r_cut*box_l_i[0]; 00409 p3m.params.mesh[2] = mesh[2]; 00410 p3m.params.mesh[1] = mesh[1]; 00411 p3m.params.mesh[0] = mesh[0]; 00412 p3m.params.cao = cao; 00413 00414 if (alpha > 0) { 00415 p3m.params.alpha = alpha; 00416 p3m.params.alpha_L = alpha*box_l[0]; 00417 } 00418 else 00419 if (alpha != -1.0) 00420 return -4; 00421 00422 if (accuracy >= 0) 00423 p3m.params.accuracy = accuracy; 00424 else 00425 if (accuracy != -1.0) 00426 return -5; 00427 00428 mpi_bcast_coulomb_params(); 00429 00430 return 0; 00431 } 00432 00433 00434 int p3m_set_mesh_offset(double x, double y, double z) 00435 { 00436 if(x < 0.0 || x > 1.0 || 00437 y < 0.0 || y > 1.0 || 00438 z < 0.0 || z > 1.0 ) 00439 return ES_ERROR; 00440 00441 p3m.params.mesh_off[0] = x; 00442 p3m.params.mesh_off[1] = y; 00443 p3m.params.mesh_off[2] = z; 00444 00445 mpi_bcast_coulomb_params(); 00446 00447 return ES_OK; 00448 } 00449 00450 00451 00452 int p3m_set_eps(double eps) 00453 { 00454 p3m.params.epsilon = eps; 00455 00456 mpi_bcast_coulomb_params(); 00457 00458 return ES_OK; 00459 } 00460 00461 00462 00463 int p3m_set_ninterpol(int n) 00464 { 00465 if (n < 0) 00466 return ES_ERROR; 00467 00468 p3m.params.inter = n; 00469 00470 mpi_bcast_coulomb_params(); 00471 00472 return ES_OK; 00473 } 00474 00475 00476 /************************************* method ********************************/ 00477 /*****************************************************************************/ 00478 00479 void p3m_interpolate_charge_assignment_function() 00480 { 00481 double dInterpol = 0.5 / (double)p3m.params.inter; 00482 int i; 00483 long j; 00484 00485 if (p3m.params.inter == 0) return; 00486 00487 P3M_TRACE(fprintf(stderr,"%d - interpolating (%d) the order-%d charge assignment function\n", 00488 this_node,p3m.params.inter,p3m.params.cao)); 00489 00490 p3m.params.inter2 = 2*p3m.params.inter + 1; 00491 00492 for (i=0; i < p3m.params.cao; i++) { 00493 /* allocate memory for interpolation array */ 00494 p3m.int_caf[i] = (double *) realloc(p3m.int_caf[i], sizeof(double)*(2*p3m.params.inter+1)); 00495 00496 /* loop over all interpolation points */ 00497 for (j=-p3m.params.inter; j<=p3m.params.inter; j++) 00498 p3m.int_caf[i][j+p3m.params.inter] = p3m_caf(i, j*dInterpol,p3m.params.cao); 00499 } 00500 00501 } 00502 00503 /* assign the charges */ 00504 void p3m_charge_assign() 00505 { 00506 Cell *cell; 00507 Particle *p; 00508 int i,c,np; 00509 /* charged particle counter, charge fraction counter */ 00510 int cp_cnt=0; 00511 /* prepare local FFT mesh */ 00512 for(i=0; i<p3m.local_mesh.size; i++) p3m.rs_mesh[i] = 0.0; 00513 00514 for (c = 0; c < local_cells.n; c++) { 00515 cell = local_cells.cell[c]; 00516 p = cell->part; 00517 np = cell->n; 00518 for(i = 0; i < np; i++) { 00519 if( p[i].p.q != 0.0 00520 ) { 00521 p3m_assign_charge(p[i].p.q, p[i].r.p, cp_cnt); 00522 cp_cnt++; 00523 } 00524 } 00525 } 00526 #ifdef P3M_STORE_CA_FRAC 00527 p3m_shrink_wrap_charge_grid(cp_cnt); 00528 #endif 00529 } 00530 00531 void p3m_assign_charge(double q, 00532 double real_pos[3], 00533 int cp_cnt) 00534 { 00535 int d, i0, i1, i2; 00536 double tmp0, tmp1; 00537 /* position of a particle in local mesh units */ 00538 double pos; 00539 /* 1d-index of nearest mesh point */ 00540 int nmp; 00541 /* distance to nearest mesh point */ 00542 double dist[3]; 00543 /* index for caf interpolation grid */ 00544 int arg[3]; 00545 /* index, index jumps for rs_mesh array */ 00546 int q_ind = 0; 00547 double cur_ca_frac_val; 00548 #ifdef P3M_STORE_CA_FRAC 00549 double *cur_ca_frac; 00550 00551 // make sure we have enough space 00552 if (cp_cnt >= p3m.ca_num) p3m_realloc_ca_fields(cp_cnt + 1); 00553 // do it here, since p3m_realloc_ca_fields may change the address of p3m.ca_frac 00554 cur_ca_frac = p3m.ca_frac + p3m.params.cao3*cp_cnt; 00555 #endif 00556 00557 for(d=0;d<3;d++) { 00558 /* particle position in mesh coordinates */ 00559 pos = ((real_pos[d]-p3m.local_mesh.ld_pos[d])*p3m.params.ai[d]) - p3m.pos_shift; 00560 /* nearest mesh point */ 00561 nmp = (int)pos; 00562 /* 3d-array index of nearest mesh point */ 00563 q_ind = (d == 0) ? nmp : nmp + p3m.local_mesh.dim[d]*q_ind; 00564 00565 if (p3m.params.inter == 0) 00566 /* distance to nearest mesh point */ 00567 dist[d] = (pos-nmp)-0.5; 00568 else 00569 /* distance to nearest mesh point for interpolation */ 00570 arg[d] = (int) ((pos - nmp)*p3m.params.inter2); 00571 00572 #ifdef ADDITIONAL_CHECKS 00573 if( pos < -skin*p3m.params.ai[d] ) { 00574 fprintf(stderr,"%d: rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]); 00575 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00576 this_node,my_left[d] - skin, my_right[d] + skin); 00577 } 00578 if( (nmp + p3m.params.cao) > p3m.local_mesh.dim[d] ) { 00579 fprintf(stderr,"%d: rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp); 00580 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00581 this_node, my_left[d] - skin, my_right[d] + skin); 00582 } 00583 #endif 00584 } 00585 00586 #ifdef P3M_STORE_CA_FRAC 00587 if (cp_cnt >= 0) p3m.ca_fmp[cp_cnt] = q_ind; 00588 #endif 00589 00590 if (p3m.params.inter == 0) { 00591 for(i0=0; i0<p3m.params.cao; i0++) { 00592 tmp0 = p3m_caf(i0, dist[0],p3m.params.cao); 00593 for(i1=0; i1<p3m.params.cao; i1++) { 00594 tmp1 = tmp0 * p3m_caf(i1, dist[1],p3m.params.cao); 00595 for(i2=0; i2<p3m.params.cao; i2++) { 00596 cur_ca_frac_val = q * tmp1 * p3m_caf(i2, dist[2], p3m.params.cao); 00597 p3m.rs_mesh[q_ind] += cur_ca_frac_val; 00598 #ifdef P3M_STORE_CA_FRAC 00599 /* store current ca frac */ 00600 if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val; 00601 #endif 00602 q_ind++; 00603 } 00604 q_ind += p3m.local_mesh.q_2_off; 00605 } 00606 q_ind += p3m.local_mesh.q_21_off; 00607 } 00608 } else { 00609 for(i0=0; i0<p3m.params.cao; i0++) { 00610 tmp0 = p3m.int_caf[i0][arg[0]]; 00611 for(i1=0; i1<p3m.params.cao; i1++) { 00612 tmp1 = tmp0 * p3m.int_caf[i1][arg[1]]; 00613 for(i2=0; i2<p3m.params.cao; i2++) { 00614 cur_ca_frac_val = q * tmp1 * p3m.int_caf[i2][arg[2]]; 00615 p3m.rs_mesh[q_ind] += cur_ca_frac_val; 00616 #ifdef P3M_STORE_CA_FRAC 00617 /* store current ca frac */ 00618 if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val; 00619 #endif 00620 q_ind++; 00621 } 00622 q_ind += p3m.local_mesh.q_2_off; 00623 } 00624 q_ind += p3m.local_mesh.q_21_off; 00625 } 00626 } 00627 } 00628 00629 #ifdef P3M_STORE_CA_FRAC 00630 /** shrink wrap the charge grid */ 00631 void p3m_shrink_wrap_charge_grid(int n_charges) { 00632 /* we do not really want to export these */ 00633 if( n_charges < p3m.ca_num ) p3m_realloc_ca_fields(n_charges); 00634 } 00635 #endif 00636 00637 /* assign the forces obtained from k-space */ 00638 static void P3M_assign_forces(double force_prefac, int d_rs) 00639 { 00640 Cell *cell; 00641 Particle *p; 00642 int i,c,np,i0,i1,i2; 00643 double q; 00644 #ifdef ONEPART_DEBUG 00645 double db_fsum=0.0; /* TODO: db_fsum was missing and code couldn't compile. Now it has the arbitrary value of 0, fix it. */ 00646 #endif 00647 /* charged particle counter, charge fraction counter */ 00648 int cp_cnt=0; 00649 #ifdef P3M_STORE_CA_FRAC 00650 int cf_cnt=0; 00651 #else 00652 /* distance to nearest mesh point */ 00653 double dist[3]; 00654 /* index for caf interpolation grid */ 00655 int arg[3]; 00656 #endif 00657 /* index, index jumps for rs_mesh array */ 00658 int q_ind = 0; 00659 00660 for (c = 0; c < local_cells.n; c++) { 00661 cell = local_cells.cell[c]; 00662 p = cell->part; 00663 np = cell->n; 00664 for(i=0; i<np; i++) { 00665 if( (q=p[i].p.q) != 0.0 ) { 00666 #ifdef P3M_STORE_CA_FRAC 00667 q_ind = p3m.ca_fmp[cp_cnt]; 00668 for(i0=0; i0<p3m.params.cao; i0++) { 00669 for(i1=0; i1<p3m.params.cao; i1++) { 00670 for(i2=0; i2<p3m.params.cao; i2++) { 00671 p[i].f.f[d_rs] -= force_prefac*p3m.ca_frac[cf_cnt]*p3m.rs_mesh[q_ind]; 00672 q_ind++; 00673 cf_cnt++; 00674 } 00675 q_ind += p3m.local_mesh.q_2_off; 00676 } 00677 q_ind += p3m.local_mesh.q_21_off; 00678 } 00679 cp_cnt++; 00680 #else 00681 double pos; 00682 int nmp; 00683 double tmp0,tmp1; 00684 double cur_ca_frac_val; 00685 for(int d=0;d<3;d++) { 00686 /* particle position in mesh coordinates */ 00687 pos = ((p[i].r.p[d]-p3m.local_mesh.ld_pos[d])*p3m.params.ai[d]) - p3m.pos_shift; 00688 /* nearest mesh point */ 00689 nmp = (int)pos; 00690 /* 3d-array index of nearest mesh point */ 00691 q_ind = (d == 0) ? nmp : nmp + p3m.local_mesh.dim[d]*q_ind; 00692 00693 if (p3m.params.inter == 0) 00694 /* distance to nearest mesh point */ 00695 dist[d] = (pos-nmp)-0.5; 00696 else 00697 /* distance to nearest mesh point for interpolation */ 00698 arg[d] = (int)((pos-nmp)*p3m.params.inter2); 00699 } 00700 00701 if (p3m.params.inter == 0) { 00702 for(i0=0; i0<p3m.params.cao; i0++) { 00703 tmp0 = p3m_caf(i0, dist[0],p3m.params.cao); 00704 for(i1=0; i1<p3m.params.cao; i1++) { 00705 tmp1 = tmp0 * p3m_caf(i1, dist[1],p3m.params.cao); 00706 for(i2=0; i2<p3m.params.cao; i2++) { 00707 cur_ca_frac_val = q * tmp1 * p3m_caf(i2, dist[2], p3m.params.cao); 00708 p[i].f.f[d_rs] -= force_prefac*cur_ca_frac_val*p3m.rs_mesh[q_ind]; 00709 q_ind++; 00710 } 00711 q_ind += p3m.local_mesh.q_2_off; 00712 } 00713 q_ind += p3m.local_mesh.q_21_off; 00714 } 00715 } else { 00716 for(i0=0; i0<p3m.params.cao; i0++) { 00717 tmp0 = p3m.int_caf[i0][arg[0]]; 00718 for(i1=0; i1<p3m.params.cao; i1++) { 00719 tmp1 = tmp0 * p3m.int_caf[i1][arg[1]]; 00720 for(i2=0; i2<p3m.params.cao; i2++) { 00721 cur_ca_frac_val = q * tmp1 * p3m.int_caf[i2][arg[2]]; 00722 p[i].f.f[d_rs] -= force_prefac*cur_ca_frac_val*p3m.rs_mesh[q_ind]; 00723 q_ind++; 00724 } 00725 q_ind += p3m.local_mesh.q_2_off; 00726 } 00727 q_ind += p3m.local_mesh.q_21_off; 00728 } 00729 } 00730 #endif 00731 00732 ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum)); 00733 } 00734 } 00735 } 00736 } 00737 00738 00739 00740 double p3m_calc_kspace_forces(int force_flag, int energy_flag) 00741 { 00742 int i,d,d_rs,ind,j[3]; 00743 /**************************************************************/ 00744 /* Prefactor for force */ 00745 double force_prefac; 00746 /* k space energy */ 00747 double k_space_energy=0.0, node_k_space_energy=0.0; 00748 /* directions */ 00749 double *d_operator = NULL; 00750 00751 P3M_TRACE(fprintf(stderr,"%d: p3m_perform: \n",this_node)); 00752 // fprintf(stderr, "calculating kspace forces\n"); 00753 00754 force_prefac = coulomb.prefactor / ( 2 * box_l[0] * box_l[1] * box_l[2] ); 00755 00756 /* Gather information for FFT grid inside the nodes domain (inner local mesh) */ 00757 /* and Perform forward 3D FFT (Charge Assignment Mesh). */ 00758 if (p3m.sum_q2 > 0) { 00759 p3m_gather_fft_grid(p3m.rs_mesh); 00760 fft_perform_forw(p3m.rs_mesh); 00761 } 00762 //Note: after these calls, the grids are in the order yzx and not xyz anymore!!! 00763 00764 /* === K Space Calculations === */ 00765 P3M_TRACE(fprintf(stderr,"%d: p3m_perform: k-Space\n",this_node)); 00766 00767 /* === K Space Energy Calculation === */ 00768 // if(energy_flag && p3m.sum_q2 > 0) { 00769 if (energy_flag) { 00770 /********************* 00771 Coulomb energy 00772 **********************/ 00773 00774 00775 for(i=0;i<fft.plan[3].new_size;i++) { 00776 // Use the energy optimized influence function for energy! 00777 node_k_space_energy += p3m.g_energy[i] * ( SQR(p3m.rs_mesh[2*i]) + SQR(p3m.rs_mesh[2*i+1]) ); 00778 } 00779 node_k_space_energy *= force_prefac; 00780 00781 MPI_Reduce(&node_k_space_energy, &k_space_energy, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart); 00782 if(this_node==0) { 00783 /* self energy correction */ 00784 k_space_energy -= coulomb.prefactor*(p3m.sum_q2 * p3m.params.alpha * wupii); 00785 /* net charge correction */ 00786 k_space_energy -= coulomb.prefactor* p3m.square_sum_q * PI / (2.0*box_l[0]*box_l[1]*box_l[2]*SQR(p3m.params.alpha)); 00787 } 00788 00789 } /* if (energy_flag) */ 00790 00791 /* === K Space Force Calculation === */ 00792 if(force_flag && p3m.sum_q2 > 0) { 00793 /*************************** 00794 COULOMB FORCES (k-space) 00795 ****************************/ 00796 /* Force preparation */ 00797 ind = 0; 00798 /* apply the influence function */ 00799 for(i=0; i<fft.plan[3].new_size; i++) { 00800 p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++; 00801 p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++; 00802 } 00803 00804 /* === 3 Fold backward 3D FFT (Force Component Meshs) === */ 00805 00806 /* Force component loop */ 00807 for(d=0;d<3;d++) { 00808 if (d == KX) 00809 d_operator = p3m.d_op[RX]; 00810 else if (d == KY) 00811 d_operator = p3m.d_op[RY]; 00812 else if (d == KZ) 00813 d_operator = p3m.d_op[RZ]; 00814 00815 /* direction in k space: */ 00816 d_rs = (d+p3m.ks_pnum)%3; 00817 /* srqt(-1)*k differentiation */ 00818 ind=0; 00819 for(j[0]=0; j[0]<fft.plan[3].new_mesh[0]; j[0]++) { 00820 for(j[1]=0; j[1]<fft.plan[3].new_mesh[1]; j[1]++) { 00821 for(j[2]=0; j[2]<fft.plan[3].new_mesh[2]; j[2]++) { 00822 /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ 00823 p3m.rs_mesh[ind] = -2.0*PI*(p3m.ks_mesh[ind+1] * d_operator[ j[d]+fft.plan[3].start[d] ])/box_l[d_rs]; 00824 ind++; 00825 p3m.rs_mesh[ind] = 2.0*PI*p3m.ks_mesh[ind-1] * d_operator[ j[d]+fft.plan[3].start[d] ]/box_l[d_rs]; 00826 ind++; 00827 } 00828 } 00829 } 00830 fft_perform_back(p3m.rs_mesh); /* Back FFT force component mesh */ 00831 p3m_spread_force_grid(p3m.rs_mesh); /* redistribute force component mesh */ 00832 P3M_assign_forces(force_prefac, d_rs); /* Assign force component from mesh to particle */ 00833 } 00834 } /* if(force_flag) */ 00835 00836 if (p3m.params.epsilon != P3M_EPSILON_METALLIC) { 00837 k_space_energy += p3m_calc_dipole_term(force_flag, energy_flag); 00838 } 00839 00840 return k_space_energy; 00841 } 00842 00843 00844 /************************************************************/ 00845 00846 double p3m_calc_dipole_term(int force_flag, int energy_flag) 00847 { 00848 int np, c, i, j; 00849 Particle *part; 00850 double pref = coulomb.prefactor*4*M_PI*box_l_i[0]*box_l_i[1]*box_l_i[2]/(2*p3m.params.epsilon + 1); 00851 double lcl_dm[3], gbl_dm[3]; 00852 double en; 00853 00854 for (j = 0; j < 3; j++) 00855 lcl_dm[j] = 0; 00856 00857 for (c = 0; c < local_cells.n; c++) { 00858 np = local_cells.cell[c]->n; 00859 part = local_cells.cell[c]->part; 00860 for (i = 0; i < np; i++) { 00861 for (j = 0; j < 3; j++) 00862 /* dipole moment with unfolded coordinates */ 00863 lcl_dm[j] += part[i].p.q*(part[i].r.p[j] + part[i].l.i[j]*box_l[j]); 00864 } 00865 } 00866 00867 MPI_Allreduce(lcl_dm, gbl_dm, 3, MPI_DOUBLE, MPI_SUM, comm_cart); 00868 00869 if (energy_flag) 00870 en = 0.5*pref*(SQR(gbl_dm[0]) + SQR(gbl_dm[1]) + SQR(gbl_dm[2])); 00871 else 00872 en = 0; 00873 if (force_flag) { 00874 for (j = 0; j < 3; j++) 00875 gbl_dm[j] *= pref; 00876 for (c = 0; c < local_cells.n; c++) { 00877 np = local_cells.cell[c]->n; 00878 part = local_cells.cell[c]->part; 00879 for (i = 0; i < np; i++) 00880 for (j = 0; j < 3; j++) 00881 part[i].f.f[j] -= gbl_dm[j]*part[i].p.q; 00882 } 00883 } 00884 return en; 00885 } 00886 00887 /************************************************************/ 00888 00889 void p3m_gather_fft_grid(double* themesh) 00890 { 00891 int s_dir,r_dir,evenodd; 00892 MPI_Status status; 00893 double *tmp_ptr; 00894 00895 P3M_TRACE(fprintf(stderr,"%d: p3m_gather_fft_grid:\n",this_node)); 00896 00897 /* direction loop */ 00898 for(s_dir=0; s_dir<6; s_dir++) { 00899 if(s_dir%2==0) r_dir = s_dir+1; 00900 else r_dir = s_dir-1; 00901 /* pack send block */ 00902 if(p3m.sm.s_size[s_dir]>0) 00903 fft_pack_block(themesh, p3m.send_grid, p3m.sm.s_ld[s_dir], p3m.sm.s_dim[s_dir], p3m.local_mesh.dim, 1); 00904 00905 /* communication */ 00906 if(node_neighbors[s_dir] != this_node) { 00907 for(evenodd=0; evenodd<2;evenodd++) { 00908 if((node_pos[s_dir/2]+evenodd)%2==0) { 00909 if(p3m.sm.s_size[s_dir]>0) 00910 MPI_Send(p3m.send_grid, p3m.sm.s_size[s_dir], MPI_DOUBLE, 00911 node_neighbors[s_dir], REQ_P3M_GATHER, comm_cart); 00912 } 00913 else { 00914 if(p3m.sm.r_size[r_dir]>0) 00915 MPI_Recv(p3m.recv_grid, p3m.sm.r_size[r_dir], MPI_DOUBLE, 00916 node_neighbors[r_dir], REQ_P3M_GATHER, comm_cart, &status); 00917 } 00918 } 00919 } 00920 else { 00921 tmp_ptr = p3m.recv_grid; 00922 p3m.recv_grid = p3m.send_grid; 00923 p3m.send_grid = tmp_ptr; 00924 } 00925 /* add recv block */ 00926 if(p3m.sm.r_size[r_dir]>0) { 00927 p3m_add_block(p3m.recv_grid, themesh, p3m.sm.r_ld[r_dir], p3m.sm.r_dim[r_dir], p3m.local_mesh.dim); 00928 } 00929 } 00930 } 00931 00932 00933 void p3m_spread_force_grid(double* themesh) 00934 { 00935 int s_dir,r_dir,evenodd; 00936 MPI_Status status; 00937 double *tmp_ptr; 00938 P3M_TRACE(fprintf(stderr,"%d: p3m_spread_force_grid:\n",this_node)); 00939 00940 /* direction loop */ 00941 for(s_dir=5; s_dir>=0; s_dir--) { 00942 if(s_dir%2==0) r_dir = s_dir+1; 00943 else r_dir = s_dir-1; 00944 /* pack send block */ 00945 if(p3m.sm.s_size[s_dir]>0) 00946 fft_pack_block(themesh, p3m.send_grid, p3m.sm.r_ld[r_dir], p3m.sm.r_dim[r_dir], p3m.local_mesh.dim, 1); 00947 /* communication */ 00948 if(node_neighbors[r_dir] != this_node) { 00949 for(evenodd=0; evenodd<2;evenodd++) { 00950 if((node_pos[r_dir/2]+evenodd)%2==0) { 00951 if(p3m.sm.r_size[r_dir]>0) 00952 MPI_Send(p3m.send_grid, p3m.sm.r_size[r_dir], MPI_DOUBLE, 00953 node_neighbors[r_dir], REQ_P3M_SPREAD, comm_cart); 00954 } 00955 else { 00956 if(p3m.sm.s_size[s_dir]>0) 00957 MPI_Recv(p3m.recv_grid, p3m.sm.s_size[s_dir], MPI_DOUBLE, 00958 node_neighbors[s_dir], REQ_P3M_SPREAD, comm_cart, &status); 00959 } 00960 } 00961 } 00962 else { 00963 tmp_ptr = p3m.recv_grid; 00964 p3m.recv_grid = p3m.send_grid; 00965 p3m.send_grid = tmp_ptr; 00966 } 00967 /* un pack recv block */ 00968 if(p3m.sm.s_size[s_dir]>0) { 00969 fft_unpack_block(p3m.recv_grid, themesh, p3m.sm.s_ld[s_dir], p3m.sm.s_dim[s_dir], p3m.local_mesh.dim, 1); 00970 } 00971 } 00972 } 00973 00974 #ifdef P3M_STORE_CA_FRAC 00975 void p3m_realloc_ca_fields(int newsize) 00976 { 00977 newsize = ((newsize + CA_INCREMENT - 1)/CA_INCREMENT)*CA_INCREMENT; 00978 if (newsize == p3m.ca_num) return; 00979 if (newsize < CA_INCREMENT) newsize = CA_INCREMENT; 00980 00981 P3M_TRACE(fprintf(stderr,"%d: p3m_realloc_ca_fields: old_size=%d -> new_size=%d\n",this_node,p3m.ca_num,newsize)); 00982 p3m.ca_num = newsize; 00983 p3m.ca_frac = (double *)realloc(p3m.ca_frac, p3m.params.cao3*p3m.ca_num*sizeof(double)); 00984 p3m.ca_fmp = (int *)realloc(p3m.ca_fmp, p3m.ca_num*sizeof(int)); 00985 00986 } 00987 #endif 00988 00989 void p3m_calc_meshift(void) 00990 { 00991 int i; 00992 00993 p3m.meshift_x = (double *) realloc(p3m.meshift_x, p3m.params.mesh[0]*sizeof(double)); 00994 p3m.meshift_y = (double *) realloc(p3m.meshift_y, p3m.params.mesh[1]*sizeof(double)); 00995 p3m.meshift_z = (double *) realloc(p3m.meshift_z, p3m.params.mesh[2]*sizeof(double)); 00996 00997 p3m.meshift_x[0] = p3m.meshift_y[0] = p3m.meshift_z[0] = 0; 00998 for (i = 1; i <= p3m.params.mesh[RX]/2; i++) { 00999 p3m.meshift_x[i] = i; 01000 p3m.meshift_x[p3m.params.mesh[0] - i] = -i; 01001 } 01002 01003 for (i = 1; i <= p3m.params.mesh[RY]/2; i++) { 01004 p3m.meshift_y[i] = i; 01005 p3m.meshift_y[p3m.params.mesh[1] - i] = -i; 01006 } 01007 01008 for (i = 1; i <= p3m.params.mesh[RZ]/2; i++) { 01009 p3m.meshift_z[i] = i; 01010 p3m.meshift_z[p3m.params.mesh[2] - i] = -i; 01011 } 01012 01013 } 01014 01015 01016 01017 void p3m_calc_differential_operator() 01018 { 01019 int i,j; 01020 01021 for(i=0;i<3;i++) { 01022 p3m.d_op[i] = realloc(p3m.d_op[i], p3m.params.mesh[i]*sizeof(double)); 01023 p3m.d_op[i][0] = 0; 01024 p3m.d_op[i][p3m.params.mesh[i]/2] = 0.0; 01025 01026 for(j = 1; j < p3m.params.mesh[i]/2; j++) { 01027 p3m.d_op[i][j] = j; 01028 p3m.d_op[i][p3m.params.mesh[i] - j] = -j; 01029 } 01030 } 01031 } 01032 01033 void p3m_calc_influence_function_force() 01034 { 01035 int i, n[3], ind; 01036 int end[3]; 01037 int size = 1; 01038 double fak1,fak2,fak3; 01039 double nominator[3] = {0.0, 0.0, 0.0}, denominator = 0.0; 01040 01041 p3m_calc_meshift(); 01042 01043 for(i=0;i<3;i++) { 01044 size *= fft.plan[3].new_mesh[i]; 01045 end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i]; 01046 } 01047 p3m.g_force = (double *) realloc(p3m.g_force, size*sizeof(double)); 01048 01049 for(n[0]=fft.plan[3].start[0]; n[0]<end[0]; n[0]++) { 01050 for(n[1]=fft.plan[3].start[1]; n[1]<end[1]; n[1]++) { 01051 for(n[2]=fft.plan[3].start[2]; n[2]<end[2]; n[2]++) { 01052 ind = (n[2]-fft.plan[3].start[2]) 01053 + fft.plan[3].new_mesh[2] * ((n[1]-fft.plan[3].start[1]) 01054 + (fft.plan[3].new_mesh[1]*(n[0]-fft.plan[3].start[0]))); 01055 01056 if( (n[KX]%(p3m.params.mesh[RX]/2)==0) && (n[KY]%(p3m.params.mesh[RY]/2)==0) && (n[KZ]%(p3m.params.mesh[RZ]/2)==0) ) { 01057 p3m.g_force[ind] = 0.0; 01058 } 01059 else { 01060 denominator = p3m_perform_aliasing_sums_force(n,nominator); 01061 01062 fak1 = p3m.d_op[RX][n[KX]]*nominator[RX]/box_l[RX] + p3m.d_op[RY][n[KY]]*nominator[RY]/box_l[RY] + p3m.d_op[RZ][n[KZ]]*nominator[RZ]/box_l[RZ]; 01063 fak2 = SQR(p3m.d_op[RX][n[KX]]/box_l[RX])+SQR(p3m.d_op[RY][n[KY]]/box_l[RY])+SQR(p3m.d_op[RZ][n[KZ]]/box_l[RZ]); 01064 01065 fak3 = fak1/(fak2 * SQR(denominator)); 01066 p3m.g_force[ind] = 2*fak3/(PI); 01067 } 01068 } 01069 } 01070 } 01071 } 01072 01073 01074 double p3m_perform_aliasing_sums_force(int n[3], double numerator[3]) 01075 { 01076 int i; 01077 double denominator = 0.0; 01078 /* lots of temporary variables... */ 01079 double sx, sy, sz, f1, f2, mx, my, mz, nmx, nmy, nmz, nm2, expo; 01080 double limit = 30; 01081 01082 for(i = 0; i < 3; i++) 01083 numerator[i] = 0.0; 01084 01085 f1 = SQR(PI/(p3m.params.alpha)); 01086 01087 for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { 01088 nmx = p3m.meshift_x[n[KX]] + p3m.params.mesh[RX]*mx; 01089 sx = pow(sinc(nmx/(double)p3m.params.mesh[RX]),2.0*p3m.params.cao); 01090 for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { 01091 nmy = p3m.meshift_y[n[KY]] + p3m.params.mesh[RY]*my; 01092 sy = sx*pow(sinc(nmy/(double)p3m.params.mesh[RY]),2.0*p3m.params.cao); 01093 for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { 01094 nmz = p3m.meshift_z[n[KZ]] + p3m.params.mesh[RZ]*mz; 01095 sz = sy*pow(sinc(nmz/(double)p3m.params.mesh[RZ]),2.0*p3m.params.cao); 01096 01097 nm2 = SQR(nmx/box_l[RX]) + SQR(nmy/box_l[RY]) + SQR(nmz/box_l[RZ]); 01098 expo = f1*nm2; 01099 f2 = (expo<limit) ? sz*exp(-expo)/nm2 : 0.0; 01100 01101 numerator[RX] += f2*nmx/box_l[RX]; 01102 numerator[RY] += f2*nmy/box_l[RY]; 01103 numerator[RZ] += f2*nmz/box_l[RZ]; 01104 01105 denominator += sz; 01106 } 01107 } 01108 } 01109 return denominator; 01110 } 01111 01112 void p3m_calc_influence_function_energy() 01113 { 01114 int i,n[3],ind; 01115 int end[3]; int start[3]; 01116 int size=1; 01117 01118 p3m_calc_meshift(); 01119 01120 for(i = 0; i < 3; i++) { 01121 size *= fft.plan[3].new_mesh[i]; 01122 end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i]; 01123 start[i] = fft.plan[3].start[i]; 01124 } 01125 01126 p3m.g_energy = (double *) realloc(p3m.g_energy, size*sizeof(double)); 01127 ind = 0; 01128 01129 01130 01131 for(n[0]=start[0]; n[0]<end[0]; n[0]++) { 01132 for(n[1]=start[1]; n[1]<end[1]; n[1]++) { 01133 for(n[2]=start[2]; n[2]<end[2]; n[2]++) { 01134 ind = (n[2]-start[2]) 01135 + fft.plan[3].new_mesh[2] * (n[1]-start[1]) 01136 + fft.plan[3].new_mesh[2] * fft.plan[3].new_mesh[1]*(n[0]-start[0]); 01137 if( (n[KX]%(p3m.params.mesh[RX]/2)==0) && (n[KY]%(p3m.params.mesh[RY]/2)==0) && (n[KZ]%(p3m.params.mesh[RZ]/2)==0) ) { 01138 p3m.g_energy[ind] = 0.0; 01139 } 01140 01141 else 01142 p3m.g_energy[ind] = p3m_perform_aliasing_sums_energy(n)/PI; 01143 } 01144 } 01145 } 01146 } 01147 01148 double p3m_perform_aliasing_sums_energy(int n[3]) 01149 { 01150 double numerator=0.0, denominator=0.0; 01151 /* lots of temporary variables... */ 01152 double sx, sy, sz, f1, f2, mx, my, mz, nmx, nmy, nmz, nm2, expo; 01153 double limit = 30; 01154 01155 f1 = SQR(PI/(p3m.params.alpha)); 01156 01157 for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { 01158 nmx = p3m.meshift_x[n[KX]] + p3m.params.mesh[RX]*mx; 01159 sx = pow(sinc(nmx/(double)p3m.params.mesh[RX]),2.0*p3m.params.cao); 01160 for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { 01161 nmy = p3m.meshift_y[n[KY]] + p3m.params.mesh[RY]*my; 01162 sy = sx*pow(sinc(nmy/(double)p3m.params.mesh[RY]),2.0*p3m.params.cao); 01163 for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { 01164 nmz = p3m.meshift_z[n[KZ]] + p3m.params.mesh[RZ]*mz; 01165 sz = sy*pow(sinc(nmz/(double)p3m.params.mesh[RZ]),2.0*p3m.params.cao); 01166 /* k = 2*pi * (nx/lx, ny/ly, nz/lz); expo = -k^2 / 4*alpha^2 */ 01167 nm2 = SQR(nmx/box_l[RX]) + SQR(nmy/box_l[RY]) + SQR(nmz/box_l[RZ]); 01168 expo = f1*nm2; 01169 f2 = (expo<limit) ? sz*exp(-expo)/nm2 : 0.0; 01170 01171 numerator += f2; 01172 denominator += sz; 01173 } 01174 } 01175 } 01176 01177 return numerator/SQR(denominator); 01178 } 01179 01180 01181 01182 /************************************************ 01183 * Functions for P3M Parameter tuning 01184 * This tuning is based on P3M_tune by M. Deserno 01185 ************************************************/ 01186 01187 #define P3M_TUNE_MAX_CUTS 50 01188 01189 /** get the minimal error for this combination of parameters. In fact, 01190 the real space error is tuned such that it contributes half of the 01191 total error, and then the Fourier space error is 01192 calculated. Returns the error and the optimal alpha, or 0 if this 01193 combination does not work at all */ 01194 static double p3m_get_accuracy(int mesh[3], int cao, double r_cut_iL, double *_alpha_L, double *_rs_err, double *_ks_err) 01195 { 01196 double rs_err, ks_err; 01197 double alpha_L; 01198 P3M_TRACE(fprintf(stderr, "p3m_get_accuracy: mesh (%d, %d, %d), cao %d, r_cut %f ", mesh[0], mesh[1], mesh[2], cao, r_cut_iL)); 01199 01200 /* calc maximal real space error for setting */ 01201 rs_err = p3m_real_space_error(coulomb.prefactor,r_cut_iL,p3m.sum_qpart,p3m.sum_q2,0); 01202 01203 if(M_SQRT2*rs_err > p3m.params.accuracy) { 01204 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ 01205 alpha_L = sqrt(log(M_SQRT2*rs_err/p3m.params.accuracy)) / r_cut_iL; 01206 } 01207 else { 01208 /* even alpha=0 is ok, however, we cannot choose it since it kills the k-space error formula. 01209 Anyways, this very likely NOT the optimal solution */ 01210 alpha_L = 0.1; 01211 } 01212 01213 *_alpha_L = alpha_L; 01214 /* calculate real space and k space error for this alpha_L */ 01215 rs_err = p3m_real_space_error(coulomb.prefactor,r_cut_iL,p3m.sum_qpart,p3m.sum_q2,alpha_L); 01216 ks_err = p3m_k_space_error(coulomb.prefactor,mesh,cao,p3m.sum_qpart,p3m.sum_q2,alpha_L); 01217 01218 *_rs_err = rs_err; 01219 *_ks_err = ks_err; 01220 P3M_TRACE(fprintf(stderr, "resulting: alpha_L %g -> rs_err: %g, ks_err %g, total_err %g\n", alpha_L, rs_err, ks_err,sqrt(SQR(rs_err)+SQR(ks_err)))); 01221 return sqrt(SQR(rs_err)+SQR(ks_err)); 01222 } 01223 01224 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao, r_cut and alpha */ 01225 static double p3m_mcr_time(int mesh[3], int cao, double r_cut_iL, double alpha_L) 01226 { 01227 /* rounded up 2000/n_charges timing force evaluations */ 01228 int int_num = (1999 + p3m.sum_qpart)/p3m.sum_qpart; 01229 double int_time; 01230 01231 /* broadcast p3m parameters for test run */ 01232 if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M) 01233 coulomb.method = COULOMB_P3M; 01234 p3m.params.r_cut_iL = r_cut_iL; 01235 p3m.params.mesh[0] = mesh[0]; 01236 p3m.params.mesh[1] = mesh[1]; 01237 p3m.params.mesh[2] = mesh[2]; 01238 p3m.params.cao = cao; 01239 p3m.params.alpha_L = alpha_L; 01240 p3m_scaleby_box_l(); 01241 /* initialize p3m structures */ 01242 mpi_bcast_coulomb_params(); 01243 /* perform force calculation test */ 01244 int_time = time_force_calc(int_num); 01245 P3M_TRACE(fprintf(stderr, "%d: test integration with mesh (%d %d %d), r_cut_iL %lf, cao %d, alpha_L %lf returned %lf.\n", this_node, mesh[0], mesh[1], mesh[2], r_cut_iL, cao, alpha_L, int_time)); 01246 return int_time; 01247 } 01248 01249 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao. The r_cut is determined via 01250 a simple bisection. Returns -1 if the force evaluation does not work, -2 if there is no valid r_cut, and -3 if 01251 the charge assigment order is to large for this grid */ 01252 static double p3m_mc_time(char **log, int mesh[3], int cao, 01253 double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL, 01254 double *_alpha_L, double *_accuracy) 01255 { 01256 double int_time; 01257 double r_cut_iL; 01258 double rs_err, ks_err, mesh_size, k_cut; 01259 int i, n_cells; 01260 char b[5*ES_DOUBLE_SPACE + 3*ES_INTEGER_SPACE + 128]; 01261 01262 /* initial checks. */ 01263 mesh_size = box_l[0]/(double)mesh[0]; 01264 k_cut = mesh_size*cao/2.0; 01265 P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh=(%d, %d, %d), cao=%d, rmin=%f, rmax=%f\n", 01266 mesh[0],mesh[1],mesh[2], cao, r_cut_iL_min, r_cut_iL_max)); 01267 if(cao >= imin(mesh[0],imin(mesh[1],mesh[2])) || k_cut >= (dmin(min_box_l,min_local_box_l) - skin)) { 01268 sprintf(b, "%-4d %-3d cao too large for this mesh\n", mesh[0], cao); 01269 *log = strcat_alloc(*log, b); 01270 return -3; 01271 } 01272 01273 /* Either low and high boundary are equal (for fixed cut), or the low border is initially 0 and therefore 01274 has infinite error estimate, as required. Therefore if the high boundary fails, there is no possible r_cut */ 01275 if ((*_accuracy = p3m_get_accuracy(mesh, cao, r_cut_iL_max, _alpha_L, &rs_err, &ks_err)) > p3m.params.accuracy) { 01276 /* print result */ 01277 P3M_TRACE(puts("p3m_mc_time: accuracy not achieved.")); 01278 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e accuracy not achieved\n", 01279 mesh[0], cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err); 01280 *log = strcat_alloc(*log, b); 01281 return -2; 01282 } 01283 01284 for (;;) { 01285 P3M_TRACE(fprintf(stderr, "p3m_mc_time: interval [%f,%f]\n", r_cut_iL_min, r_cut_iL_max)); 01286 r_cut_iL = 0.5*(r_cut_iL_min + r_cut_iL_max); 01287 01288 if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC) 01289 break; 01290 01291 /* bisection */ 01292 if ((p3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err) > p3m.params.accuracy)) 01293 r_cut_iL_min = r_cut_iL; 01294 else 01295 r_cut_iL_max = r_cut_iL; 01296 } 01297 01298 /* final result is always the upper interval boundary, since only there 01299 we know that the desired minimal accuracy is obtained */ 01300 *_r_cut_iL = r_cut_iL = r_cut_iL_max; 01301 01302 /* check whether we are running P3M+ELC, and whether we leave a reasonable gap space */ 01303 if (coulomb.method == COULOMB_ELC_P3M && elc_params.gap_size <= 1.1*r_cut_iL*box_l[0]) { 01304 P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f reject r_cut %f > gap %f\n", mesh[0],mesh[1],mesh[2], cao, r_cut_iL, 01305 2*r_cut_iL*box_l[0], elc_params.gap_size)); 01306 /* print result */ 01307 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e conflict with ELC\n", 01308 mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err); 01309 *log = strcat_alloc(*log, b); 01310 return -P3M_TUNE_ELCTEST; 01311 } 01312 01313 /* check whether this radius is too large, so that we would use less cells than allowed */ 01314 n_cells = 1; 01315 for (i = 0; i < 3; i++) 01316 n_cells *= (int)(floor(local_box_l[i]/(r_cut_iL*box_l[0] + skin))); 01317 if (n_cells < min_num_cells) { 01318 P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f reject n_cells %d\n", mesh[0], mesh[1], mesh[2], cao, r_cut_iL, n_cells)); 01319 /* print result */ 01320 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e radius dangerously high\n\n", 01321 mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err); 01322 *log = strcat_alloc(*log, b); 01323 } 01324 int_time = p3m_mcr_time(mesh, cao, r_cut_iL, *_alpha_L); 01325 if (int_time == -1) { 01326 *log = strcat_alloc(*log, "tuning failed, test integration not possible\n"); 01327 return -P3M_TUNE_FAIL; 01328 } 01329 01330 *_accuracy = p3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err); 01331 01332 P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f time %f\n", mesh[0], mesh[1], mesh[2], cao, r_cut_iL, int_time)); 01333 /* print result */ 01334 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e %-8d\n", 01335 mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err, (int)int_time); 01336 *log = strcat_alloc(*log, b); 01337 return int_time; 01338 } 01339 01340 /** get the optimal alpha and the corresponding computation time for fixed mesh. *cao 01341 should contain an initial guess, which is then adapted by stepping up and down. Returns the time 01342 upon completion, -1 if the force evaluation does not work, and -2 if the accuracy cannot be met */ 01343 static double p3m_m_time(char **log, int mesh[3], 01344 int cao_min, int cao_max, int *_cao, 01345 double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL, 01346 double *_alpha_L, double *_accuracy) 01347 { 01348 double best_time = -1, tmp_time, tmp_r_cut_iL=0.0, tmp_alpha_L=0.0, tmp_accuracy=0.0; 01349 /* in which direction improvement is possible. Initially, we dont know it yet. */ 01350 int final_dir = 0; 01351 int cao = *_cao; 01352 01353 P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d %d), cao_min=%d, cao_max=%d, rmin=%f, rmax=%f\n", 01354 mesh[0],mesh[1],mesh[2], cao_min, cao_max, r_cut_iL_min, r_cut_iL_max)); 01355 /* the initial step sets a timing mark. If there is no valid r_cut, we can only try 01356 to increase cao to increase the obtainable precision of the far formula. */ 01357 do { 01358 tmp_time = p3m_mc_time(log, mesh, cao, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01359 /* bail out if the force evaluation is not working */ 01360 if (tmp_time == -1) return -1; 01361 /* cao is too large for this grid, but still the accuracy cannot be achieved, give up */ 01362 if (tmp_time == -3) { 01363 P3M_TRACE(fprintf(stderr, "p3m_m_time: no possible cao found\n")); 01364 return -2; 01365 } 01366 /* we have a valid time, start optimising from there */ 01367 if (tmp_time >= 0) { 01368 best_time = tmp_time; 01369 *_r_cut_iL = tmp_r_cut_iL; 01370 *_alpha_L = tmp_alpha_L; 01371 *_accuracy = tmp_accuracy; 01372 *_cao = cao; 01373 break; 01374 } 01375 /* the required accuracy could not be obtained, try higher caos. Therefore optimisation can only be 01376 obtained with even higher caos, but not lower ones */ 01377 P3M_TRACE(fprintf(stderr, "p3m_m_time: doesn't give precision, step up\n")); 01378 cao++; 01379 final_dir = 1; 01380 } 01381 while (cao <= cao_max); 01382 /* with this mesh, the required accuracy cannot be obtained. */ 01383 if (cao > cao_max) return -2; 01384 01385 /* at the boundaries, only the opposite direction can be used for optimisation */ 01386 if (cao == cao_min) final_dir = 1; 01387 else if (cao == cao_max) final_dir = -1; 01388 01389 P3M_TRACE(fprintf(stderr, "p3m_m_time: final constraints dir %d\n", final_dir)); 01390 01391 if (final_dir == 0) { 01392 /* check in which direction we can optimise. Both directions are possible */ 01393 double dir_times[3]; 01394 for (final_dir = -1; final_dir <= 1; final_dir += 2) { 01395 dir_times[final_dir + 1] = tmp_time = 01396 p3m_mc_time(log, mesh, cao + final_dir, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01397 /* bail out on errors, as usual */ 01398 if (tmp_time == -1) return -P3M_TUNE_FAIL; 01399 /* in this direction, we cannot optimise, since we get into precision trouble */ 01400 if (tmp_time < 0) continue; 01401 01402 if (tmp_time < best_time) { 01403 best_time = tmp_time; 01404 *_r_cut_iL = tmp_r_cut_iL; 01405 *_alpha_L = tmp_alpha_L; 01406 *_accuracy = tmp_accuracy; 01407 *_cao = cao + final_dir; 01408 } 01409 } 01410 /* choose the direction which was optimal, if any of the two */ 01411 if (dir_times[0] == best_time) { final_dir = -1; } 01412 else if (dir_times[2] == best_time) { final_dir = 1; } 01413 else { 01414 /* no improvement in either direction, however if one is only marginally worse, we can still try*/ 01415 /* down is possible and not much worse, while up is either illegal or even worse */ 01416 if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) && 01417 (dir_times[2] < 0 || dir_times[2] > dir_times[0])) 01418 final_dir = -1; 01419 /* same for up */ 01420 else if ((dir_times[2] >= 0 && dir_times[2] < best_time + P3M_TIME_GRAN) && 01421 (dir_times[0] < 0 || dir_times[0] > dir_times[2])) 01422 final_dir = 1; 01423 else { 01424 /* really no chance for optimisation */ 01425 P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d, %d) final cao=%d time=%f\n",mesh[0],mesh[1],mesh[2], cao, best_time)); 01426 return best_time; 01427 } 01428 } 01429 /* we already checked the initial cao and its neighbor */ 01430 cao += 2*final_dir; 01431 } 01432 else { 01433 /* here some constraint is active, and we only checked the initial cao itself */ 01434 cao += final_dir; 01435 } 01436 01437 P3M_TRACE(fprintf(stderr, "p3m_m_time: optimise in direction %d\n", final_dir)); 01438 01439 /* move cao into the optimisation direction until we do not gain anymore. */ 01440 for (; cao >= cao_min && cao <= cao_max; cao += final_dir) { 01441 tmp_time = p3m_mc_time(log, mesh, cao, r_cut_iL_min, r_cut_iL_max, 01442 &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01443 /* bail out on errors, as usual */ 01444 if (tmp_time == -1) return -1; 01445 /* if we cannot meet the precision anymore, give up */ 01446 if (tmp_time < 0) break; 01447 01448 if (tmp_time < best_time) { 01449 best_time = tmp_time; 01450 *_r_cut_iL = tmp_r_cut_iL; 01451 *_alpha_L = tmp_alpha_L; 01452 *_accuracy = tmp_accuracy; 01453 *_cao = cao; 01454 } 01455 /* no hope of further optimisation */ 01456 else if (tmp_time > best_time + P3M_TIME_GRAN) 01457 break; 01458 } 01459 P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d, %d) final cao=%d r_cut=%f time=%f\n",mesh[0],mesh[1],mesh[2], *_cao, *_r_cut_iL, best_time)); 01460 return best_time; 01461 } 01462 01463 int p3m_adaptive_tune(char **log) { 01464 int mesh[3] = {0, 0, 0}; 01465 int tmp_mesh[3]; 01466 double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1, tmp_r_cut_iL=0.0; 01467 int cao_min, cao_max, cao = -1, tmp_cao; 01468 double alpha_L = -1, tmp_alpha_L=0.0; 01469 double accuracy = -1, tmp_accuracy=0.0; 01470 double time_best=1e20, tmp_time; 01471 double mesh_density = 0.0, mesh_density_min, mesh_density_max; 01472 char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128]; 01473 01474 if (skin == -1) { 01475 *log = strcat_alloc(*log, "p3m cannot be tuned, since the skin is not yet set"); 01476 return ES_ERROR; 01477 } 01478 01479 if (p3m.params.epsilon != P3M_EPSILON_METALLIC) { 01480 if( !((box_l[0] == box_l[1]) && 01481 (box_l[1] == box_l[2]))) { 01482 *log = strcat_alloc(*log, "{049 P3M_init: Nonmetallic epsilon requires cubic box} "); 01483 return ES_ERROR; 01484 } 01485 } 01486 01487 /* preparation */ 01488 mpi_bcast_event(P3M_COUNT_CHARGES); 01489 /* Print Status */ 01490 sprintf(b, "P3M tune parameters: Accuracy goal = %.5e\n", p3m.params.accuracy); 01491 *log = strcat_alloc(*log, b); 01492 sprintf(b, "System: box_l = %.5e # charged part = %d Sum[q_i^2] = %.5e\n", 01493 box_l[0], p3m.sum_qpart, p3m.sum_q2); 01494 *log = strcat_alloc(*log, b); 01495 01496 if (p3m.sum_qpart == 0) { 01497 *log = strcat_alloc(*log, "no charged particles in the system, cannot tune P3M"); 01498 return ES_ERROR; 01499 } 01500 01501 /* parameter ranges */ 01502 /* if at least the number of meshpoints in one direction is not set, we have to tune it. */ 01503 if (p3m.params.mesh[0] == 0 || p3m.params.mesh[1] == 0 || p3m.params.mesh[2] == 0) { 01504 mesh_density_min = pow(p3m.sum_qpart / (box_l[0] * box_l[1] * box_l[2]) , 1.0/3.0); 01505 mesh_density_max = 256 / pow(box_l[0] * box_l[1] * box_l[2], 1.0/3.0); 01506 /* this limits the tried meshes if the accuracy cannot 01507 be obtained with smaller meshes, but normally not all these 01508 meshes have to be tested */ 01509 /* avoid using more than 1 GB of FFT arrays (per default, see config.h) */ 01510 01511 P3M_TRACE(fprintf(stderr, "%d: starting with meshdensity %lf, using at most %lf.\n", this_node, mesh_density_min, mesh_density_max)); 01512 01513 } else { 01514 mesh_density = mesh_density_min = mesh_density_max = p3m.params.mesh[0] / box_l[0]; 01515 01516 sprintf(b, "fixed mesh %d %d %d\n", p3m.params.mesh[0], p3m.params.mesh[1], p3m.params.mesh[2]); 01517 *log = strcat_alloc(*log, b); 01518 } 01519 01520 if(p3m.params.r_cut_iL == 0.0) { 01521 r_cut_iL_min = 0; 01522 r_cut_iL_max = dmin(min_local_box_l, min_box_l/2.0) - skin; 01523 r_cut_iL_min *= box_l_i[0]; 01524 r_cut_iL_max *= box_l_i[0]; 01525 } 01526 else { 01527 r_cut_iL_min = r_cut_iL_max = p3m.params.r_cut_iL; 01528 01529 sprintf(b, "fixed r_cut_iL %f\n", p3m.params.r_cut_iL); 01530 *log = strcat_alloc(*log, b); 01531 } 01532 01533 if(p3m.params.cao == 0) { 01534 cao_min = 1; 01535 cao_max = 7; 01536 cao = 3; 01537 } 01538 else { 01539 cao_min = cao_max = cao = p3m.params.cao; 01540 01541 sprintf(b, "fixed cao %d\n", p3m.params.cao); 01542 *log = strcat_alloc(*log, b); 01543 } 01544 01545 *log = strcat_alloc(*log, "mesh cao r_cut_iL alpha_L err rs_err ks_err time [ms]\n"); 01546 01547 /* mesh loop */ 01548 /* we're tuning the density of mesh points, which is the same in every direction. */ 01549 for (mesh_density=mesh_density_min;mesh_density<=mesh_density_max;mesh_density+=0.1) { 01550 tmp_cao = cao; 01551 01552 P3M_TRACE(fprintf(stderr, "%d: trying meshdensity %lf.\n", this_node, mesh_density)); 01553 01554 tmp_mesh[0] = (int)(box_l[0]*mesh_density); 01555 tmp_mesh[1] = (int)(box_l[1]*mesh_density); 01556 tmp_mesh[2] = (int)(box_l[2]*mesh_density); 01557 01558 if(tmp_mesh[0] % 2) 01559 tmp_mesh[0]++; 01560 if(tmp_mesh[1] % 2) 01561 tmp_mesh[1]++; 01562 if(tmp_mesh[2] % 2) 01563 tmp_mesh[2]++; 01564 01565 tmp_time = p3m_m_time(log, tmp_mesh, 01566 cao_min, cao_max, &tmp_cao, 01567 r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, 01568 &tmp_alpha_L, &tmp_accuracy); 01569 /* some error occured during the tuning force evaluation */ 01570 P3M_TRACE(fprintf(stderr,"delta_acceracy: %lf tune time: %lf\n", p3m.params.accuracy - tmp_accuracy,tmp_time)); 01571 // if (tmp_time == -1) con; 01572 /* this mesh does not work at all */ 01573 if (tmp_time < 0.0) continue; 01574 01575 /* the optimum r_cut for this mesh is the upper limit for higher meshes, 01576 everything else is slower */ 01577 r_cut_iL_max = tmp_r_cut_iL; 01578 01579 /* new optimum */ 01580 if (tmp_time < time_best) { 01581 P3M_TRACE(fprintf(stderr, "Found new optimum: time %lf, mesh (%d %d %d)\n", tmp_time, tmp_mesh[0], tmp_mesh[1], tmp_mesh[2])); 01582 time_best = tmp_time; 01583 mesh[0] = tmp_mesh[0]; 01584 mesh[1] = tmp_mesh[1]; 01585 mesh[2] = tmp_mesh[2]; 01586 cao = tmp_cao; 01587 r_cut_iL = tmp_r_cut_iL; 01588 alpha_L = tmp_alpha_L; 01589 accuracy = tmp_accuracy; 01590 } 01591 /* no hope of further optimisation */ 01592 else if (tmp_time > time_best + P3M_TIME_GRAN) { 01593 P3M_TRACE(fprintf(stderr, "%d: %lf is mush slower then best time, aborting.\n", this_node, tmp_time)); 01594 break; 01595 } 01596 } 01597 01598 P3M_TRACE(fprintf(stderr,"%d: finished tuning, best time: %lf\n", this_node,time_best)); 01599 if(time_best == 1e20) { 01600 *log = strcat_alloc(*log, "failed to tune P3M parameters to required accuracy\n"); 01601 return ES_ERROR; 01602 } 01603 01604 /* set tuned p3m parameters */ 01605 p3m.params.r_cut_iL = r_cut_iL; 01606 p3m.params.mesh[0] = mesh[0]; 01607 p3m.params.mesh[1] = mesh[1]; 01608 p3m.params.mesh[2] = mesh[2]; 01609 p3m.params.cao = cao; 01610 p3m.params.alpha_L = alpha_L; 01611 p3m.params.accuracy = accuracy; 01612 p3m_scaleby_box_l(); 01613 /* broadcast tuned p3m parameters */ 01614 P3M_TRACE(fprintf(stderr,"%d: Broadcasting P3M parameters: mesh: (%d %d %d), cao: %d, alpha_L: %lf, acccuracy: %lf\n", this_node, p3m.params.mesh[0], p3m.params.mesh[1], p3m.params.mesh[2], p3m.params.cao, p3m.params.alpha_L, p3m.params.accuracy)); 01615 mpi_bcast_coulomb_params(); 01616 01617 P3M_TRACE(p3m_print()); 01618 01619 /* Tell the user about the outcome */ 01620 sprintf(b, "\nresulting parameters:\n%-4d %-3d %.5e %.5e %.5e %-8d\n", 01621 mesh[0], cao, r_cut_iL, alpha_L, accuracy, (int)time_best); 01622 *log = strcat_alloc(*log, b); 01623 return ES_OK; 01624 } 01625 01626 void p3m_count_charged_particles() 01627 { 01628 Cell *cell; 01629 Particle *part; 01630 int i,c,np; 01631 double node_sums[3], tot_sums[3]; 01632 01633 P3M_TRACE(fprintf(stderr,"%d: p3m_count_charged_particles\n",this_node)); 01634 01635 for(i=0;i<3;i++) 01636 { node_sums[i]=0.0; tot_sums[i]=0.0;} 01637 01638 for (c = 0; c < local_cells.n; c++) { 01639 cell = local_cells.cell[c]; 01640 part = cell->part; 01641 np = cell->n; 01642 for(i=0;i<np;i++) { 01643 if( part[i].p.q != 0.0 ) { 01644 node_sums[0] += 1.0; 01645 node_sums[1] += SQR(part[i].p.q); 01646 node_sums[2] += part[i].p.q; 01647 } 01648 } 01649 } 01650 01651 MPI_Allreduce(node_sums, tot_sums, 3, MPI_DOUBLE, MPI_SUM, comm_cart); 01652 p3m.sum_qpart = (int)(tot_sums[0]+0.1); 01653 p3m.sum_q2 = tot_sums[1]; 01654 p3m.square_sum_q = SQR(tot_sums[2]); 01655 01656 P3M_TRACE(fprintf(stderr, "%d: p3m.sum_qpart: %d, p3m.sum_q2: %lf, total_charge %lf\n", this_node, p3m.sum_qpart, p3m.sum_q2, sqrt(p3m.square_sum_q))); 01657 } 01658 01659 01660 double p3m_real_space_error(double prefac, double r_cut_iL, 01661 int n_c_part, double sum_q2, double alpha_L) 01662 { 01663 return (2.0*prefac*sum_q2*exp(-SQR(r_cut_iL*alpha_L))) / (sqrt((double)n_c_part*r_cut_iL)*box_l[1]*box_l[2]); 01664 } 01665 01666 double p3m_k_space_error(double prefac, int mesh[3], int cao, int n_c_part, double sum_q2, double alpha_L) 01667 { 01668 int nx, ny, nz; 01669 double he_q = 0.0, mesh_i[3] = {1.0/mesh[0], 1.0/mesh[1], 1.0/mesh[2]}, alpha_L_i = 1./alpha_L; 01670 double alias1, alias2, n2, cs; 01671 double ctan_x, ctan_y; 01672 01673 for (nx=-mesh[0]/2; nx<mesh[0]/2; nx++) { 01674 ctan_x = p3m_analytic_cotangent_sum(nx,mesh_i[0],cao); 01675 for (ny=-mesh[1]/2; ny<mesh[1]/2; ny++) { 01676 ctan_y = ctan_x * p3m_analytic_cotangent_sum(ny,mesh_i[1],cao); 01677 for (nz=-mesh[2]/2; nz<mesh[2]/2; nz++) { 01678 if((nx!=0) || (ny!=0) || (nz!=0)) { 01679 n2 = SQR(nx) + SQR(ny) + SQR(nz); 01680 cs = p3m_analytic_cotangent_sum(nz,mesh_i[2],cao)*ctan_y; 01681 p3m_tune_aliasing_sums(nx,ny,nz,mesh,mesh_i,cao,alpha_L_i,&alias1,&alias2); 01682 01683 double d = alias1 - SQR(alias2/cs) / n2; 01684 /* at high precisions, d can become negative due to extinction; 01685 also, don't take values that have no significant digits left*/ 01686 if (d > 0 && (fabs(d/alias1) > ROUND_ERROR_PREC)) 01687 he_q += d; 01688 } 01689 } 01690 } 01691 } 01692 return 2.0*prefac*sum_q2*sqrt(he_q/(double)n_c_part) / (box_l[1]*box_l[2]); 01693 } 01694 01695 01696 void p3m_tune_aliasing_sums(int nx, int ny, int nz, 01697 int mesh[3], double mesh_i[3], int cao, double alpha_L_i, 01698 double *alias1, double *alias2) 01699 { 01700 01701 int mx,my,mz; 01702 double nmx,nmy,nmz; 01703 double fnmx,fnmy,fnmz; 01704 01705 double ex,ex2,nm2,U2,factor1; 01706 01707 factor1 = SQR(PI*alpha_L_i); 01708 01709 *alias1 = *alias2 = 0.0; 01710 for (mx=-P3M_BRILLOUIN; mx<=P3M_BRILLOUIN; mx++) { 01711 fnmx = mesh_i[0] * (nmx = nx + mx*mesh[0]); 01712 for (my=-P3M_BRILLOUIN; my<=P3M_BRILLOUIN; my++) { 01713 fnmy = mesh_i[1] * (nmy = ny + my*mesh[1]); 01714 for (mz=-P3M_BRILLOUIN; mz<=P3M_BRILLOUIN; mz++) { 01715 fnmz = mesh_i[2] * (nmz = nz + mz*mesh[2]); 01716 01717 nm2 = SQR(nmx) + SQR(nmy) + SQR(nmz); 01718 ex2 = SQR( ex = exp(-factor1*nm2) ); 01719 01720 U2 = pow(sinc(fnmx)*sinc(fnmy)*sinc(fnmz), 2.0*cao); 01721 01722 *alias1 += ex2 / nm2; 01723 *alias2 += U2 * ex * (nx*nmx + ny*nmy + nz*nmz) / nm2; 01724 } 01725 } 01726 } 01727 } 01728 01729 01730 01731 01732 /************************************************************/ 01733 01734 void p3m_calc_local_ca_mesh() { 01735 int i; 01736 int ind[3]; 01737 /* total skin size */ 01738 double full_skin[3]; 01739 01740 for(i=0;i<3;i++) 01741 full_skin[i]= p3m.params.cao_cut[i]+skin+p3m.params.additional_mesh[i]; 01742 01743 /* inner left down grid point (global index) */ 01744 for(i=0;i<3;i++) p3m.local_mesh.in_ld[i] = (int)ceil(my_left[i]*p3m.params.ai[i]-p3m.params.mesh_off[i]); 01745 /* inner up right grid point (global index) */ 01746 for(i=0;i<3;i++) p3m.local_mesh.in_ur[i] = (int)floor(my_right[i]*p3m.params.ai[i]-p3m.params.mesh_off[i]); 01747 01748 /* correct roundof errors at boundary */ 01749 for(i=0;i<3;i++) { 01750 if((my_right[i]*p3m.params.ai[i]-p3m.params.mesh_off[i])-p3m.local_mesh.in_ur[i]<ROUND_ERROR_PREC) p3m.local_mesh.in_ur[i]--; 01751 if(1.0+(my_left[i]*p3m.params.ai[i]-p3m.params.mesh_off[i])-p3m.local_mesh.in_ld[i]<ROUND_ERROR_PREC) p3m.local_mesh.in_ld[i]--; 01752 } 01753 /* inner grid dimensions */ 01754 for(i=0;i<3;i++) p3m.local_mesh.inner[i] = p3m.local_mesh.in_ur[i] - p3m.local_mesh.in_ld[i] + 1; 01755 /* index of left down grid point in global mesh */ 01756 for(i=0;i<3;i++) 01757 p3m.local_mesh.ld_ind[i]=(int)ceil((my_left[i]-full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i]); 01758 /* left down margin */ 01759 for(i=0;i<3;i++) p3m.local_mesh.margin[i*2] = p3m.local_mesh.in_ld[i]-p3m.local_mesh.ld_ind[i]; 01760 /* up right grid point */ 01761 for(i=0;i<3;i++) ind[i]=(int)floor((my_right[i]+full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i]); 01762 /* correct roundof errors at up right boundary */ 01763 for(i=0;i<3;i++) 01764 if(((my_right[i]+full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i])-ind[i]==0) ind[i]--; 01765 /* up right margin */ 01766 for(i=0;i<3;i++) p3m.local_mesh.margin[(i*2)+1] = ind[i] - p3m.local_mesh.in_ur[i]; 01767 01768 /* grid dimension */ 01769 p3m.local_mesh.size=1; 01770 for(i=0;i<3;i++) {p3m.local_mesh.dim[i] = ind[i] - p3m.local_mesh.ld_ind[i] + 1; p3m.local_mesh.size*=p3m.local_mesh.dim[i];} 01771 /* reduce inner grid indices from global to local */ 01772 for(i=0;i<3;i++) p3m.local_mesh.in_ld[i] = p3m.local_mesh.margin[i*2]; 01773 for(i=0;i<3;i++) p3m.local_mesh.in_ur[i] = p3m.local_mesh.margin[i*2]+p3m.local_mesh.inner[i]; 01774 01775 p3m.local_mesh.q_2_off = p3m.local_mesh.dim[2] - p3m.params.cao; 01776 p3m.local_mesh.q_21_off = p3m.local_mesh.dim[2] * (p3m.local_mesh.dim[1] - p3m.params.cao); 01777 01778 } 01779 01780 void p3m_calc_lm_ld_pos() { 01781 int i; 01782 /* spacial position of left down mesh point */ 01783 for(i=0;i<3;i++) { 01784 p3m.local_mesh.ld_pos[i] = (p3m.local_mesh.ld_ind[i]+ p3m.params.mesh_off[i])*p3m.params.a[i]; 01785 } 01786 } 01787 01788 01789 void p3m_init_a_ai_cao_cut() { 01790 int i; 01791 for(i=0;i<3;i++) { 01792 p3m.params.ai[i] = (double)p3m.params.mesh[i]/box_l[i]; 01793 p3m.params.a[i] = 1.0/p3m.params.ai[i]; 01794 p3m.params.cao_cut[i] = 0.5*p3m.params.a[i]*p3m.params.cao; 01795 } 01796 } 01797 01798 01799 01800 01801 int p3m_sanity_checks_boxl() { 01802 char *errtxt; 01803 int i, ret = 0; 01804 for(i=0;i<3;i++) { 01805 /* check k-space cutoff */ 01806 if(p3m.params.cao_cut[i] >= 0.5*box_l[i]) { 01807 errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE); 01808 ERROR_SPRINTF(errtxt,"{039 P3M_init: k-space cutoff %g is larger than half of box dimension %g} ",p3m.params.cao_cut[i],box_l[i]); 01809 ret = 1; 01810 } 01811 if(p3m.params.cao_cut[i] >= local_box_l[i]) { 01812 errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE); 01813 ERROR_SPRINTF(errtxt,"{040 P3M_init: k-space cutoff %g is larger than local box dimension %g} ",p3m.params.cao_cut[i],local_box_l[i]); 01814 ret = 1; 01815 } 01816 } 01817 01818 01819 return ret; 01820 } 01821 01822 01823 01824 01825 int p3m_sanity_checks() 01826 { 01827 char *errtxt; 01828 int ret = 0; 01829 01830 if (!PERIODIC(0) || !PERIODIC(1) || !PERIODIC(2)) { 01831 errtxt = runtime_error(128); 01832 ERROR_SPRINTF(errtxt, "{041 P3M requires periodicity 1 1 1} "); 01833 ret = 1; 01834 } 01835 01836 if (cell_structure.type != CELL_STRUCTURE_DOMDEC) { 01837 errtxt = runtime_error(128); 01838 ERROR_SPRINTF(errtxt, "{042 P3M at present requires the domain decomposition cell system} "); 01839 ret = 1; 01840 } 01841 01842 if (p3m_sanity_checks_boxl()) ret = 1; 01843 01844 if( p3m.params.mesh[0] == 0) { 01845 errtxt = runtime_error(128); 01846 ERROR_SPRINTF(errtxt,"{045 P3M_init: mesh size is not yet set} "); 01847 ret = 1; 01848 } 01849 if( p3m.params.cao == 0) { 01850 errtxt = runtime_error(128); 01851 ERROR_SPRINTF(errtxt,"{046 P3M_init: cao is not yet set} "); 01852 ret = 1; 01853 } 01854 if (skin == -1) { 01855 errtxt = runtime_error(128); 01856 ERROR_SPRINTF(errtxt,"{047 P3M_init: skin is not yet set} "); 01857 ret = 1; 01858 } 01859 if (p3m.params.alpha < 0.0 ) { 01860 errtxt = runtime_error(128); 01861 ERROR_SPRINTF(errtxt,"{048 P3M_init: alpha must be >0} "); 01862 ret = 1; 01863 } 01864 if(node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) { 01865 errtxt = runtime_error(128); 01866 ERROR_SPRINTF(errtxt,"{048a P3M_init: node grid must be sorted, largest first} "); 01867 ret = 1; 01868 } 01869 01870 if (p3m.params.epsilon != P3M_EPSILON_METALLIC) { 01871 if( !((p3m.params.mesh[0] == p3m.params.mesh[1]) && 01872 (p3m.params.mesh[1] == p3m.params.mesh[2]))) { 01873 errtxt = runtime_error(128); 01874 ERROR_SPRINTF(errtxt,"{049 P3M_init: Nonmetallic epsilon requires cubic box} "); 01875 ret = 1; 01876 } 01877 } 01878 01879 01880 return ret; 01881 } 01882 01883 01884 01885 01886 void p3m_calc_send_mesh() 01887 { 01888 int i,j,evenodd; 01889 int done[3]={0,0,0}; 01890 MPI_Status status; 01891 /* send grids */ 01892 for(i=0;i<3;i++) { 01893 for(j=0;j<3;j++) { 01894 /* left */ 01895 p3m.sm.s_ld[i*2][j] = 0 + done[j]*p3m.local_mesh.margin[j*2]; 01896 if(j==i) p3m.sm.s_ur[i*2][j] = p3m.local_mesh.margin[j*2]; 01897 else p3m.sm.s_ur[i*2][j] = p3m.local_mesh.dim[j]-done[j]*p3m.local_mesh.margin[(j*2)+1]; 01898 /* right */ 01899 if(j==i) p3m.sm.s_ld[(i*2)+1][j] = p3m.local_mesh.in_ur[j]; 01900 else p3m.sm.s_ld[(i*2)+1][j] = 0 + done[j]*p3m.local_mesh.margin[j*2]; 01901 p3m.sm.s_ur[(i*2)+1][j] = p3m.local_mesh.dim[j] - done[j]*p3m.local_mesh.margin[(j*2)+1]; 01902 } 01903 done[i]=1; 01904 } 01905 p3m.sm.max=0; 01906 for(i=0;i<6;i++) { 01907 p3m.sm.s_size[i] = 1; 01908 for(j=0;j<3;j++) { 01909 p3m.sm.s_dim[i][j] = p3m.sm.s_ur[i][j]-p3m.sm.s_ld[i][j]; 01910 p3m.sm.s_size[i] *= p3m.sm.s_dim[i][j]; 01911 } 01912 if(p3m.sm.s_size[i]>p3m.sm.max) p3m.sm.max=p3m.sm.s_size[i]; 01913 } 01914 /* communication */ 01915 for(i=0;i<6;i++) { 01916 if(i%2==0) j = i+1; 01917 else j = i-1; 01918 if(node_neighbors[i] != this_node) { 01919 /* two step communication: first all even positions than all odd */ 01920 for(evenodd=0; evenodd<2;evenodd++) { 01921 if((node_pos[i/2]+evenodd)%2==0) 01922 MPI_Send(&(p3m.local_mesh.margin[i]), 1, MPI_INT, 01923 node_neighbors[i],REQ_P3M_INIT,comm_cart); 01924 else 01925 MPI_Recv(&(p3m.local_mesh.r_margin[j]), 1, MPI_INT, 01926 node_neighbors[j],REQ_P3M_INIT,comm_cart,&status); 01927 } 01928 } 01929 else { 01930 p3m.local_mesh.r_margin[j] = p3m.local_mesh.margin[i]; 01931 } 01932 } 01933 /* recv grids */ 01934 for(i=0;i<3;i++) 01935 for(j=0;j<3;j++) { 01936 if(j==i) { 01937 p3m.sm.r_ld[ i*2 ][j] = p3m.sm.s_ld[ i*2 ][j] + p3m.local_mesh.margin[2*j]; 01938 p3m.sm.r_ur[ i*2 ][j] = p3m.sm.s_ur[ i*2 ][j] + p3m.local_mesh.r_margin[2*j]; 01939 p3m.sm.r_ld[(i*2)+1][j] = p3m.sm.s_ld[(i*2)+1][j] - p3m.local_mesh.r_margin[(2*j)+1]; 01940 p3m.sm.r_ur[(i*2)+1][j] = p3m.sm.s_ur[(i*2)+1][j] - p3m.local_mesh.margin[(2*j)+1]; 01941 } 01942 else { 01943 p3m.sm.r_ld[ i*2 ][j] = p3m.sm.s_ld[ i*2 ][j]; 01944 p3m.sm.r_ur[ i*2 ][j] = p3m.sm.s_ur[ i*2 ][j]; 01945 p3m.sm.r_ld[(i*2)+1][j] = p3m.sm.s_ld[(i*2)+1][j]; 01946 p3m.sm.r_ur[(i*2)+1][j] = p3m.sm.s_ur[(i*2)+1][j]; 01947 } 01948 } 01949 for(i=0;i<6;i++) { 01950 p3m.sm.r_size[i] = 1; 01951 for(j=0;j<3;j++) { 01952 p3m.sm.r_dim[i][j] = p3m.sm.r_ur[i][j]-p3m.sm.r_ld[i][j]; 01953 p3m.sm.r_size[i] *= p3m.sm.r_dim[i][j]; 01954 } 01955 if(p3m.sm.r_size[i]>p3m.sm.max) p3m.sm.max=p3m.sm.r_size[i]; 01956 } 01957 01958 01959 } 01960 01961 01962 01963 /************************************************/ 01964 01965 void p3m_scaleby_box_l() { 01966 if (coulomb.bjerrum == 0.0) { 01967 return; 01968 } 01969 01970 p3m.params.r_cut = p3m.params.r_cut_iL* box_l[0]; 01971 p3m.params.alpha = p3m.params.alpha_L * box_l_i[0]; 01972 p3m_init_a_ai_cao_cut(); 01973 p3m_calc_lm_ld_pos(); 01974 p3m_sanity_checks_boxl(); 01975 p3m_calc_influence_function_force(); 01976 p3m_calc_influence_function_energy(); 01977 } 01978 01979 /************************************************/ 01980 01981 void p3m_calc_kspace_stress (double* stress) { 01982 if (p3m.sum_q2 > 0) { 01983 double* node_k_space_stress; 01984 double* k_space_stress; 01985 double force_prefac, node_k_space_energy, sqk, vterm, kx, ky, kz; 01986 int jx, jy, jz, i, ind = 0; 01987 // ordering after fourier transform 01988 const int x = 2, y = 0, z = 1; 01989 node_k_space_stress = malloc(9*sizeof(double)); 01990 k_space_stress = malloc(9*sizeof(double)); 01991 01992 for (i = 0; i < 9; i++) { 01993 node_k_space_stress[i] = 0.0; 01994 k_space_stress[i] = 0.0; 01995 } 01996 01997 p3m_gather_fft_grid(p3m.rs_mesh); 01998 fft_perform_forw(p3m.rs_mesh); 01999 force_prefac = coulomb.prefactor / (2.0 * box_l[0] * box_l[1] * box_l[2]); 02000 02001 for(jx=0; jx < fft.plan[3].new_mesh[0]; jx++) { 02002 for(jy=0; jy < fft.plan[3].new_mesh[1]; jy++) { 02003 for(jz=0; jz < fft.plan[3].new_mesh[2]; jz++) { 02004 kx = p3m.d_op[2][ jx + fft.plan[3].start[0] ]; 02005 ky = p3m.d_op[0][ jy + fft.plan[3].start[1] ]; 02006 kz = p3m.d_op[1][ jz + fft.plan[3].start[2] ]; 02007 sqk = SQR(kx/box_l[x]) + SQR(ky/box_l[y]) + SQR(kz/box_l[z]); 02008 if (sqk == 0) { 02009 node_k_space_energy = 0.0; 02010 vterm = 0.0; 02011 } 02012 else { 02013 vterm = -2.0 * (1/sqk + SQR(PI/p3m.params.alpha)); 02014 node_k_space_energy = p3m.g_energy[ind] * ( SQR(p3m.rs_mesh[2*ind]) + SQR(p3m.rs_mesh[2*ind + 1]) ); 02015 } 02016 ind++; 02017 02018 node_k_space_stress[0] += node_k_space_energy * (1.0 + vterm*SQR(kx/box_l[x])); /* sigma_xx */ 02019 node_k_space_stress[1] += node_k_space_energy * (vterm*kx*ky/(box_l[x]*box_l[y])); /* sigma_xy */ 02020 node_k_space_stress[2] += node_k_space_energy * (vterm*kx*kz/(box_l[x]*box_l[z])); /* sigma_xz */ 02021 02022 node_k_space_stress[3] += node_k_space_energy * (vterm*kx*ky/(box_l[x]*box_l[y])); /* sigma_yx */ 02023 node_k_space_stress[4] += node_k_space_energy * (1.0 + vterm*SQR(ky/box_l[y])); /* sigma_yy */ 02024 node_k_space_stress[5] += node_k_space_energy * (vterm*ky*kz/(box_l[y]*box_l[z])); /* sigma_yz */ 02025 02026 node_k_space_stress[6] += node_k_space_energy * (vterm*kx*kz/(box_l[x]*box_l[z])); /* sigma_zx */ 02027 node_k_space_stress[7] += node_k_space_energy * (vterm*ky*kz/(box_l[y]*box_l[z])); /* sigma_zy */ 02028 node_k_space_stress[8] += node_k_space_energy * (1.0 + vterm*SQR(kz/box_l[z])); /* sigma_zz */ 02029 } 02030 } 02031 } 02032 MPI_Reduce(node_k_space_stress, k_space_stress, 9, MPI_DOUBLE, MPI_SUM, 0, comm_cart); 02033 for (i = 0; i < 9; i++) { 02034 stress[i] += k_space_stress[i] * force_prefac; 02035 } 02036 // fprintf(stderr, "sxx = %.5e, syy = %.5e, szz = %.5e\n", stress[0], stress[4], stress[8]); 02037 // fprintf(stderr, "sxy = %.5e, sxz = %.5e, syz = %.5e\n", stress[1], stress[2], stress[5]); 02038 free (node_k_space_stress); 02039 free (k_space_stress); 02040 } 02041 } 02042 02043 02044 /************************************************/ 02045 02046 02047 02048 02049 /*********************** miscelanea of functions *************************************/ 02050 02051 /************************************************ 02052 * Debug functions printing p3m structures 02053 ************************************************/ 02054 02055 void p3m_p3m_print_struct(p3m_parameter_struct ps) { 02056 fprintf(stderr,"%d: p3m_parameter_struct: \n",this_node); 02057 fprintf(stderr," alpha_L=%f, r_cut_iL=%f \n", 02058 ps.alpha_L,ps.r_cut_iL); 02059 fprintf(stderr," mesh=(%d,%d,%d), mesh_off=(%.4f,%.4f,%.4f)\n", 02060 ps.mesh[0],ps.mesh[1],ps.mesh[2], 02061 ps.mesh_off[0],ps.mesh_off[1],ps.mesh_off[2]); 02062 fprintf(stderr," cao=%d, inter=%d, epsilon=%f\n", 02063 ps.cao,ps.inter,ps.epsilon); 02064 fprintf(stderr," cao_cut=(%f,%f,%f)\n", 02065 ps.cao_cut[0],ps.cao_cut[1],ps.cao_cut[2]); 02066 fprintf(stderr," a=(%f,%f,%f), ai=(%f,%f,%f)\n", 02067 ps.a[0],ps.a[1],ps.a[2],ps.ai[0],ps.ai[1],ps.ai[2]); 02068 } 02069 02070 #endif /* of P3M */ 02071
1.7.5.1