ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
p3m.c
Go to the documentation of this file.
00001 /*
00002   Copyright (C) 2010,2011,2012,2013 The ESPResSo project
00003   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
00004     Max-Planck-Institute for Polymer Research, Theory Group
00005   
00006   This file is part of ESPResSo.
00007   
00008   ESPResSo is free software: you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation, either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   ESPResSo is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
00020 */
00021 #include <mpi.h>
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 
00026 #include "utils.h"
00027 #include "integrate.h"
00028 #include "initialize.h"
00029 #include "global.h"
00030 #include "grid.h"
00031 #include "domain_decomposition.h"
00032 #include "particle_data.h"
00033 #include "communication.h"
00034 #include "fft.h"
00035 #include "p3m.h"
00036 #include "thermostat.h"
00037 #include "cells.h"
00038 #include "tuning.h"
00039 #include "elc.h"
00040 
00041 #ifdef P3M
00042 
00043 /************************************************
00044  * variables
00045  ************************************************/
00046 p3m_data_struct p3m;
00047 
00048 /* MPI tags for the charge-charge p3m communications: */
00049 /** Tag for communication in P3M_init() -> send_calc_mesh(). */
00050 #define REQ_P3M_INIT   200
00051 /** Tag for communication in p3m_gather_fft_grid(). */
00052 #define REQ_P3M_GATHER 201
00053 /** Tag for communication in p3m_spread_force_grid(). */
00054 #define REQ_P3M_SPREAD 202
00055 
00056 /* Index helpers for direct and reciprocal space
00057  * After the FFT the data is in order YZX, which
00058  * means that Y is the slowest changing index.
00059  * The defines are here to not get confused and
00060  * be able to easily change the order.
00061  */
00062 #define RX 0
00063 #define RY 1
00064 #define RZ 2
00065 #define KY 0
00066 #define KZ 1
00067 #define KX 2 
00068 
00069 /** \name Private Functions */
00070 /************************************************************/
00071 /*@{*/
00072 
00073 #ifdef P3M_DEBUG
00074 static void p3m_print(void) {
00075   fprintf(stderr, "general information: \n\t node: %d \n\t box_l: (%lf, %lf, %lf)\n", this_node, box_l[0], box_l[1], box_l[2]);
00076 
00077   fprintf(stderr, "p3m parameters:\n\t alpha_L: %lf\n\t r_cut_iL: %lf\n\t \
00078                    mesh: (%d, %d, %d)\n\t mesh_off: (%lf, %lf, %lf)\n\t \
00079                    cao: %d\n\t inter: %d\n\t accuracy: %lf\n\t epsilon: %lf\n\t \
00080                    cao_cut: (%lf, %lf, %lf)\n\t a: (%lf,%lf,%lf)\n\t \
00081                    ai: (%lf,%lf,%lf)\n\t alpha: %lf\n\t r_cut: %lf\n\t \
00082                    inter2: %d\n\t cao3: %d\n\t additional_mesh: (%lf,%lf,%lf)\n", \
00083           p3m.params.alpha_L,p3m.params.r_cut_iL, p3m.params.mesh[0], p3m.params.mesh[1], p3m.params.mesh[2], p3m.params.mesh_off[0], p3m.params.mesh_off[1], p3m.params.mesh_off[2], \
00084           p3m.params.cao, p3m.params.inter, p3m.params.accuracy, p3m.params.epsilon, p3m.params.cao_cut[0], p3m.params.cao_cut[1], p3m.params.cao_cut[2], p3m.params.a[0], p3m.params.a[1], p3m.params.a[2], p3m.params.ai[0], p3m.params.ai[1], p3m.params.ai[2], \
00085           p3m.params.alpha, p3m.params.r_cut, p3m.params.inter2, p3m.params.cao3, p3m.params.additional_mesh[0], p3m.params.additional_mesh[1], p3m.params.additional_mesh[2]);
00086 }
00087 
00088 #endif
00089 
00090 /** Calculates for charges the properties of the send/recv sub-meshes of the local FFT mesh. 
00091  *  In order to calculate the recv sub-meshes there is a communication of 
00092  *  the margins between neighbouring nodes. */ 
00093 static void p3m_calc_send_mesh();
00094 
00095 
00096 /** Initializes the (inverse) mesh constant \ref p3m_parameter_struct::a (\ref
00097     p3m_parameter_struct::ai) and the cutoff for charge assignment \ref
00098     p3m_parameter_struct::cao_cut, which has to be done by \ref p3m_init
00099     once and by \ref p3m_scaleby_box_l whenever the \ref box_l
00100     changed.  */
00101 static void p3m_init_a_ai_cao_cut(void);
00102 
00103 
00104 /** Calculate the spacial position of the left down mesh point of the local mesh, to be
00105     stored in \ref p3m_local_mesh::ld_pos; function called by \ref p3m_calc_local_ca_mesh once
00106     and by \ref p3m_scaleby_box_l whenever the \ref box_l changed. */
00107 static void p3m_calc_lm_ld_pos(void);
00108 
00109 
00110 /** Calculates the dipole term */
00111 static double p3m_calc_dipole_term(int force_flag, int energy_flag);
00112 
00113 /** Gather FFT grid.
00114  *  After the charge assignment Each node needs to gather the
00115  *  information for the FFT grid in his spatial domain.
00116  */
00117 static void p3m_gather_fft_grid(double* mesh);
00118 
00119 /** Spread force grid.
00120  *  After the k-space calculations each node needs to get all force
00121  *  information to reassigne the forces from the grid to the
00122  *  particles.
00123  */
00124 static void p3m_spread_force_grid(double* mesh);
00125 
00126 #ifdef P3M_STORE_CA_FRAC
00127 /** realloc charge assignment fields. */
00128 static void p3m_realloc_ca_fields(int newsize);
00129 #endif
00130 
00131 /** checks for correctness for charges in P3M of the cao_cut, necessary when the box length changes */
00132 static int p3m_sanity_checks_boxl(void);
00133 
00134 /** Calculate the spacial position of the left down mesh point of the local mesh, to be
00135     stored in \ref p3m_local_mesh::ld_pos; function called by \ref p3m_calc_local_ca_mesh once
00136     and by \ref p3m_scaleby_box_l whenever the \ref box_l changed. */
00137 static void p3m_calc_lm_ld_pos(void);
00138 
00139 /** Calculates properties of the local FFT mesh for the 
00140     charge assignment process. */
00141 static void p3m_calc_local_ca_mesh(void);
00142 
00143 
00144 /** Interpolates the P-th order charge assignment function from
00145  * Hockney/Eastwood 5-189 (or 8-61). The following charge fractions
00146  * are also tabulated in Deserno/Holm. */
00147 static void p3m_interpolate_charge_assignment_function(void);
00148 
00149 /** shifts the mesh points by mesh/2 */
00150 static void p3m_calc_meshift(void);
00151 
00152 /** Calculates the Fourier transformed differential operator.  
00153  *  Remark: This is done on the level of n-vectors and not k-vectors,
00154  *           i.e. the prefactor i*2*PI/L is missing! */
00155 static void p3m_calc_differential_operator(void);
00156 
00157 /** Calculates the optimal influence function of Hockney and Eastwood. 
00158  * (optimised for force calculations)
00159  *
00160  *  Each node calculates only the values for its domain in k-space
00161  *  (see fft.plan[3].mesh and fft.plan[3].start).
00162  *
00163  *  See also: Hockney/Eastwood 8-22 (p275). Note the somewhat
00164  *  different convention for the prefactors, which is described in
00165  *  Deserno/Holm. */
00166 static void p3m_calc_influence_function_force(void);
00167 
00168 /** Calculates the influence function optimized for the energy and the
00169     self energy correction.  */
00170 static void p3m_calc_influence_function_energy(void);
00171 
00172 
00173 /** Calculates the aliasing sums for the optimal influence function.
00174  *
00175  * Calculates the aliasing sums in the nominator and denominator of
00176  * the expression for the optimal influence function (see
00177  * Hockney/Eastwood: 8-22, p. 275).  
00178  *
00179  * \param  n           n-vector for which the aliasing sum is to be performed.
00180  * \param  nominator   aliasing sums in the nominator.
00181  * \return denominator aliasing sum in the denominator
00182  */
00183 double p3m_perform_aliasing_sums_force(int n[3], double nominator[3]);
00184 double p3m_perform_aliasing_sums_energy(int n[3]);
00185 
00186 /*@}*/
00187 
00188 
00189 /** \name P3M Tuning Functions (private)*/
00190 /************************************************************/
00191 /*@{*/
00192 
00193 /** Calculates the real space contribution to the rms error in the force (as described 
00194    by Kolafa and Perram). 
00195    \param prefac   Prefactor of coulomb interaction.
00196    \param r_cut_iL rescaled real space cutoff for p3m method.
00197    \param n_c_part number of charged particles in the system.
00198    \param sum_q2   sum of square of charges in the system
00199    \param alpha_L  rescaled ewald splitting parameter.
00200    \return real space error
00201 */
00202 static double p3m_real_space_error(double prefac, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L);
00203 
00204 /** Calculate the analytic expression of the error estimate for the
00205     P3M method in the book of Hockney and Eastwood (Eqn. 8.23) in
00206     order to obtain the rms error in the force for a system of N
00207     randomly distributed particles in a cubic box (k space part).
00208     \param prefac   Prefactor of coulomb interaction.
00209     \param mesh     number of mesh points in one direction.
00210     \param cao      charge assignment order.
00211     \param n_c_part number of charged particles in the system.
00212     \param sum_q2   sum of square of charges in the system
00213     \param alpha_L  rescaled ewald splitting parameter.
00214     \return reciprocal (k) space error
00215 */
00216 static double p3m_k_space_error(double prefac, int mesh[3], int cao, int n_c_part, double sum_q2, double alpha_L);
00217 
00218 
00219 
00220 /** aliasing sum used by \ref p3m_k_space_error. */
00221 static void p3m_tune_aliasing_sums(int nx, int ny, int nz, 
00222                             int mesh[3], double mesh_i[3], int cao, double alpha_L_i, 
00223                             double *alias1, double *alias2);
00224 
00225 /*@}*/
00226 
00227 
00228 void p3m_pre_init(void) {
00229   p3m_common_parameter_pre_init(&p3m.params);
00230   /* p3m.local_mesh is uninitialized */
00231   /* p3m.sm is uninitialized */
00232 
00233   p3m.rs_mesh = NULL;
00234   p3m.ks_mesh = NULL;
00235   p3m.sum_qpart = 0;
00236   p3m.sum_q2 = 0.0;
00237   p3m.square_sum_q = 0.0;
00238 
00239   for (int i = 0; i < 7; i++)
00240     p3m.int_caf[i] = NULL;
00241   p3m.pos_shift = 0.0;
00242   p3m.meshift_x = NULL;
00243   p3m.meshift_y = NULL;
00244   p3m.meshift_z = NULL;
00245 
00246   p3m.d_op[0] = NULL;
00247   p3m.d_op[1] = NULL;
00248   p3m.d_op[2] = NULL;
00249   p3m.g_force = NULL;
00250   p3m.g_energy = NULL;
00251 
00252 #ifdef P3M_STORE_CA_FRAC
00253   p3m.ca_num = 0;
00254   p3m.ca_frac = NULL;
00255   p3m.ca_fmp = NULL;
00256 #endif
00257   p3m.ks_pnum = 0;
00258 
00259   p3m.send_grid = NULL;
00260   p3m.recv_grid = NULL;
00261   
00262   fft_pre_init();
00263 }
00264 
00265 void p3m_free()
00266 {
00267   int i;
00268   /* free memory */
00269 #ifdef P3M_STORE_CA_FRAC
00270   free(p3m.ca_frac);
00271   free(p3m.ca_fmp);
00272 #endif
00273   free(p3m.send_grid);
00274   free(p3m.recv_grid);
00275   free(p3m.rs_mesh);
00276   free(p3m.ks_mesh); 
00277   for(i=0; i<p3m.params.cao; i++) free(p3m.int_caf[i]);
00278 }
00279 
00280 void p3m_set_bjerrum() {
00281   p3m.params.alpha    = 0.0;
00282   p3m.params.alpha_L  = 0.0;
00283   p3m.params.r_cut    = 0.0;
00284   p3m.params.r_cut_iL = 0.0;
00285   p3m.params.mesh[0]  = 0;
00286   p3m.params.mesh[1]  = 0;
00287   p3m.params.mesh[2]  = 0;
00288   p3m.params.cao      = 0;
00289 }
00290 
00291 void   p3m_init() {
00292   if(coulomb.bjerrum == 0.0) {       
00293     p3m.params.r_cut    = 0.0;
00294     p3m.params.r_cut_iL = 0.0;
00295 
00296 
00297 
00298     if(this_node==0) 
00299       P3M_TRACE(fprintf(stderr,"0: P3M_init: Bjerrum length is zero.\n");
00300 
00301       fprintf(stderr,"   Electrostatics switched off!\n"));
00302   } else {  
00303     P3M_TRACE(fprintf(stderr,"%d: p3m_init: \n",this_node));
00304 
00305     if (p3m_sanity_checks()) return;
00306 
00307     P3M_TRACE(fprintf(stderr,"%d: p3m_init: starting\n",this_node));
00308 
00309     P3M_TRACE(fprintf(stderr,"%d: mesh=%d, cao=%d, mesh_off=(%f,%f,%f)\n",this_node,p3m.params.mesh[0],p3m.params.cao,p3m.params.mesh_off[0],p3m.params.mesh_off[1],p3m.params.mesh_off[2]));
00310     p3m.params.cao3 = p3m.params.cao*p3m.params.cao*p3m.params.cao;
00311 
00312     /* initializes the (inverse) mesh constant p3m.params.a (p3m.params.ai) and the cutoff for charge assignment p3m.params.cao_cut */
00313     p3m_init_a_ai_cao_cut();
00314 
00315 #ifdef P3M_STORE_CA_FRAC
00316     /* initialize ca fields to size CA_INCREMENT: p3m.ca_frac and p3m.ca_fmp */
00317     p3m.ca_num = 0;
00318     p3m_realloc_ca_fields(CA_INCREMENT);
00319 #endif
00320  
00321     p3m_calc_local_ca_mesh();
00322 
00323     p3m_calc_send_mesh();
00324     P3M_TRACE(p3m_p3m_print_local_mesh(p3m.local_mesh));
00325     P3M_TRACE(p3m_p3m_print_send_mesh(p3m.sm));
00326     p3m.send_grid = (double *) realloc(p3m.send_grid, sizeof(double)*p3m.sm.max);
00327     p3m.recv_grid = (double *) realloc(p3m.recv_grid, sizeof(double)*p3m.sm.max);
00328 
00329     /* fix box length dependent constants */
00330     p3m_scaleby_box_l();
00331 
00332     if (p3m.params.inter > 0)
00333       p3m_interpolate_charge_assignment_function();
00334   
00335     /* position offset for calc. of first meshpoint */
00336     p3m.pos_shift = (double)((p3m.params.cao-1)/2) - (p3m.params.cao%2)/2.0;
00337     P3M_TRACE(fprintf(stderr,"%d: p3m.pos_shift = %f\n",this_node,p3m.pos_shift)); 
00338  
00339     /* FFT */
00340     P3M_TRACE(fprintf(stderr,"%d: p3m.rs_mesh ADR=%p\n",this_node,p3m.rs_mesh));
00341  
00342     int ca_mesh_size = fft_init(&p3m.rs_mesh,
00343                                 p3m.local_mesh.dim,p3m.local_mesh.margin,
00344                                 p3m.params.mesh, p3m.params.mesh_off,
00345                                 &p3m.ks_pnum);
00346     p3m.ks_mesh = (double *) realloc(p3m.ks_mesh, ca_mesh_size*sizeof(double));
00347     
00348 
00349     P3M_TRACE(fprintf(stderr,"%d: p3m.rs_mesh ADR=%p\n",this_node,p3m.rs_mesh));
00350   
00351     /* k-space part: */
00352     p3m_calc_differential_operator();
00353     p3m_calc_influence_function_force();
00354     p3m_calc_influence_function_energy();
00355 
00356     p3m_count_charged_particles();
00357 
00358     P3M_TRACE(fprintf(stderr,"%d: p3m-charges  initialized\n",this_node));
00359   }
00360 }
00361 
00362 void p3m_set_tune_params(double r_cut, int mesh, int cao,
00363                          double alpha, double accuracy, int n_interpol)
00364 {
00365   if (r_cut >= 0) {
00366     p3m.params.r_cut    = r_cut;
00367     p3m.params.r_cut_iL = r_cut*box_l_i[0];
00368   }
00369 
00370   if (mesh >= 0)
00371     p3m.params.mesh[2] = p3m.params.mesh[1] = p3m.params.mesh[0] = mesh;
00372 
00373   if (cao >= 0)
00374     p3m.params.cao = cao;
00375 
00376   if (alpha >= 0) {
00377     p3m.params.alpha   = alpha;
00378     p3m.params.alpha_L = alpha*box_l[0];
00379   }
00380 
00381   if (accuracy >= 0)
00382     p3m.params.accuracy = accuracy;
00383 
00384   if (n_interpol != -1)
00385     p3m.params.inter = n_interpol;
00386 
00387   coulomb.prefactor = (temperature > 0) ? temperature*coulomb.bjerrum : coulomb.bjerrum;
00388 }
00389 
00390 /*@}*/
00391 
00392 int p3m_set_params(double r_cut, int *mesh, int cao,
00393                    double alpha, double accuracy)
00394 {
00395   if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M)
00396     coulomb.method = COULOMB_P3M;
00397     
00398   if(r_cut < 0)
00399     return -1;
00400 
00401   if((mesh[0] < 0) || (mesh[1] < 0) || (mesh[2] < 0))
00402     return -2;
00403 
00404   if(cao < 1 || cao > 7 || cao > mesh[0] || cao > mesh[1] || cao > mesh[2] )
00405     return -3;
00406 
00407   p3m.params.r_cut    = r_cut;
00408   p3m.params.r_cut_iL = r_cut*box_l_i[0];
00409   p3m.params.mesh[2]  = mesh[2];
00410   p3m.params.mesh[1] = mesh[1];
00411   p3m.params.mesh[0] = mesh[0];
00412   p3m.params.cao      = cao;
00413 
00414   if (alpha > 0) {
00415     p3m.params.alpha   = alpha;
00416     p3m.params.alpha_L = alpha*box_l[0];
00417   }
00418   else
00419     if (alpha != -1.0)
00420       return -4;
00421 
00422   if (accuracy >= 0)
00423     p3m.params.accuracy = accuracy;
00424   else
00425     if (accuracy != -1.0)
00426       return -5;
00427 
00428   mpi_bcast_coulomb_params();
00429 
00430   return 0;
00431 }
00432 
00433 
00434 int p3m_set_mesh_offset(double x, double y, double z)
00435 {
00436   if(x < 0.0 || x > 1.0 ||
00437      y < 0.0 || y > 1.0 ||
00438      z < 0.0 || z > 1.0 )
00439     return ES_ERROR;
00440 
00441   p3m.params.mesh_off[0] = x;
00442   p3m.params.mesh_off[1] = y;
00443   p3m.params.mesh_off[2] = z;
00444 
00445   mpi_bcast_coulomb_params();
00446 
00447   return ES_OK;
00448 }
00449 
00450 
00451 
00452 int p3m_set_eps(double eps)
00453 {
00454   p3m.params.epsilon = eps;
00455 
00456   mpi_bcast_coulomb_params();
00457 
00458   return ES_OK;
00459 }
00460 
00461 
00462 
00463 int p3m_set_ninterpol(int n)
00464 {
00465   if (n < 0)
00466     return ES_ERROR;
00467 
00468   p3m.params.inter = n;
00469 
00470   mpi_bcast_coulomb_params();
00471 
00472   return ES_OK;
00473 }
00474 
00475 
00476 /************************************* method ********************************/
00477 /*****************************************************************************/
00478 
00479 void p3m_interpolate_charge_assignment_function()
00480 {
00481   double dInterpol = 0.5 / (double)p3m.params.inter;
00482   int i;
00483   long j;
00484 
00485   if (p3m.params.inter == 0) return;
00486 
00487   P3M_TRACE(fprintf(stderr,"%d - interpolating (%d) the order-%d charge assignment function\n",
00488                     this_node,p3m.params.inter,p3m.params.cao));
00489 
00490   p3m.params.inter2 = 2*p3m.params.inter + 1;
00491 
00492   for (i=0; i < p3m.params.cao; i++) {
00493     /* allocate memory for interpolation array */
00494     p3m.int_caf[i] = (double *) realloc(p3m.int_caf[i], sizeof(double)*(2*p3m.params.inter+1));
00495 
00496     /* loop over all interpolation points */
00497     for (j=-p3m.params.inter; j<=p3m.params.inter; j++)
00498       p3m.int_caf[i][j+p3m.params.inter] = p3m_caf(i, j*dInterpol,p3m.params.cao);
00499   }
00500   
00501 }
00502 
00503 /* assign the charges */
00504 void p3m_charge_assign()
00505 {
00506   Cell *cell;
00507   Particle *p;
00508   int i,c,np;
00509   /* charged particle counter, charge fraction counter */
00510   int cp_cnt=0;
00511   /* prepare local FFT mesh */
00512   for(i=0; i<p3m.local_mesh.size; i++) p3m.rs_mesh[i] = 0.0;
00513 
00514   for (c = 0; c < local_cells.n; c++) {
00515     cell = local_cells.cell[c];
00516     p  = cell->part;
00517     np = cell->n;
00518     for(i = 0; i < np; i++) {
00519       if( p[i].p.q != 0.0
00520           ) {
00521         p3m_assign_charge(p[i].p.q, p[i].r.p, cp_cnt);
00522         cp_cnt++;
00523       }
00524     }
00525   }
00526 #ifdef P3M_STORE_CA_FRAC
00527   p3m_shrink_wrap_charge_grid(cp_cnt);
00528 #endif
00529 }
00530 
00531 void p3m_assign_charge(double q,
00532                        double real_pos[3],
00533                        int cp_cnt)
00534 {
00535   int d, i0, i1, i2;
00536   double tmp0, tmp1;
00537   /* position of a particle in local mesh units */
00538   double pos;
00539   /* 1d-index of nearest mesh point */
00540   int nmp;
00541   /* distance to nearest mesh point */
00542   double dist[3];
00543   /* index for caf interpolation grid */
00544   int arg[3];
00545   /* index, index jumps for rs_mesh array */
00546   int q_ind = 0;
00547   double cur_ca_frac_val;
00548 #ifdef P3M_STORE_CA_FRAC
00549   double *cur_ca_frac;
00550 
00551   // make sure we have enough space
00552   if (cp_cnt >= p3m.ca_num) p3m_realloc_ca_fields(cp_cnt + 1);
00553   // do it here, since p3m_realloc_ca_fields may change the address of p3m.ca_frac
00554   cur_ca_frac = p3m.ca_frac + p3m.params.cao3*cp_cnt;
00555 #endif
00556 
00557   for(d=0;d<3;d++) {
00558     /* particle position in mesh coordinates */
00559     pos    = ((real_pos[d]-p3m.local_mesh.ld_pos[d])*p3m.params.ai[d]) - p3m.pos_shift;
00560     /* nearest mesh point */
00561     nmp  = (int)pos;
00562     /* 3d-array index of nearest mesh point */
00563     q_ind = (d == 0) ? nmp : nmp + p3m.local_mesh.dim[d]*q_ind;
00564     
00565     if (p3m.params.inter == 0)
00566       /* distance to nearest mesh point */
00567       dist[d] = (pos-nmp)-0.5;
00568     else
00569       /* distance to nearest mesh point for interpolation */
00570       arg[d] = (int) ((pos - nmp)*p3m.params.inter2);
00571 
00572 #ifdef ADDITIONAL_CHECKS
00573     if( pos < -skin*p3m.params.ai[d] ) {
00574       fprintf(stderr,"%d: rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]);
00575       fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00576               this_node,my_left[d] - skin, my_right[d] + skin);     
00577     }
00578     if( (nmp + p3m.params.cao) > p3m.local_mesh.dim[d] ) {
00579       fprintf(stderr,"%d: rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp);
00580       fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00581               this_node, my_left[d] - skin, my_right[d] + skin);
00582     }
00583 #endif
00584   }
00585 
00586 #ifdef P3M_STORE_CA_FRAC
00587   if (cp_cnt >= 0) p3m.ca_fmp[cp_cnt] = q_ind;
00588 #endif
00589   
00590   if (p3m.params.inter == 0) {
00591     for(i0=0; i0<p3m.params.cao; i0++) {
00592       tmp0 = p3m_caf(i0, dist[0],p3m.params.cao);
00593       for(i1=0; i1<p3m.params.cao; i1++) {
00594         tmp1 = tmp0 * p3m_caf(i1, dist[1],p3m.params.cao);
00595         for(i2=0; i2<p3m.params.cao; i2++) {
00596           cur_ca_frac_val = q * tmp1 * p3m_caf(i2, dist[2], p3m.params.cao);
00597           p3m.rs_mesh[q_ind] += cur_ca_frac_val;
00598 #ifdef P3M_STORE_CA_FRAC
00599           /* store current ca frac */
00600           if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val;
00601 #endif
00602           q_ind++;
00603         }
00604         q_ind += p3m.local_mesh.q_2_off;
00605       }
00606       q_ind += p3m.local_mesh.q_21_off;
00607     }
00608   } else {
00609     for(i0=0; i0<p3m.params.cao; i0++) {
00610       tmp0 = p3m.int_caf[i0][arg[0]];
00611       for(i1=0; i1<p3m.params.cao; i1++) {
00612         tmp1 = tmp0 * p3m.int_caf[i1][arg[1]];
00613         for(i2=0; i2<p3m.params.cao; i2++) {
00614           cur_ca_frac_val = q * tmp1 * p3m.int_caf[i2][arg[2]];
00615           p3m.rs_mesh[q_ind] += cur_ca_frac_val;
00616 #ifdef P3M_STORE_CA_FRAC
00617           /* store current ca frac */
00618           if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val;
00619 #endif
00620           q_ind++;
00621         }
00622         q_ind += p3m.local_mesh.q_2_off;
00623       }
00624       q_ind += p3m.local_mesh.q_21_off;
00625     }
00626   }
00627 }
00628 
00629 #ifdef P3M_STORE_CA_FRAC
00630 /** shrink wrap the charge grid */
00631 void p3m_shrink_wrap_charge_grid(int n_charges) {
00632   /* we do not really want to export these */
00633   if( n_charges < p3m.ca_num ) p3m_realloc_ca_fields(n_charges);
00634 }
00635 #endif
00636 
00637 /* assign the forces obtained from k-space */
00638 static void P3M_assign_forces(double force_prefac, int d_rs) 
00639 {
00640   Cell *cell;
00641   Particle *p;
00642   int i,c,np,i0,i1,i2;
00643   double q;
00644 #ifdef ONEPART_DEBUG
00645   double db_fsum=0.0; /* TODO: db_fsum was missing and code couldn't compile. Now it has the arbitrary value of 0, fix it. */ 
00646 #endif
00647   /* charged particle counter, charge fraction counter */
00648   int cp_cnt=0;
00649 #ifdef P3M_STORE_CA_FRAC
00650   int cf_cnt=0;
00651 #else
00652   /* distance to nearest mesh point */
00653   double dist[3];
00654   /* index for caf interpolation grid */
00655   int arg[3];
00656 #endif
00657   /* index, index jumps for rs_mesh array */
00658   int q_ind = 0;
00659 
00660   for (c = 0; c < local_cells.n; c++) {
00661     cell = local_cells.cell[c];
00662     p  = cell->part;
00663     np = cell->n;
00664     for(i=0; i<np; i++) { 
00665       if( (q=p[i].p.q) != 0.0 ) {
00666 #ifdef P3M_STORE_CA_FRAC
00667         q_ind = p3m.ca_fmp[cp_cnt];
00668         for(i0=0; i0<p3m.params.cao; i0++) {
00669           for(i1=0; i1<p3m.params.cao; i1++) {
00670             for(i2=0; i2<p3m.params.cao; i2++) {
00671               p[i].f.f[d_rs] -= force_prefac*p3m.ca_frac[cf_cnt]*p3m.rs_mesh[q_ind]; 
00672               q_ind++;
00673               cf_cnt++;
00674             }
00675             q_ind += p3m.local_mesh.q_2_off;
00676           }
00677           q_ind += p3m.local_mesh.q_21_off;
00678         }
00679         cp_cnt++;
00680 #else
00681         double pos;
00682         int nmp;
00683         double tmp0,tmp1;
00684         double cur_ca_frac_val;
00685         for(int d=0;d<3;d++) {
00686           /* particle position in mesh coordinates */
00687           pos    = ((p[i].r.p[d]-p3m.local_mesh.ld_pos[d])*p3m.params.ai[d]) - p3m.pos_shift;
00688           /* nearest mesh point */
00689           nmp  = (int)pos;
00690           /* 3d-array index of nearest mesh point */
00691           q_ind = (d == 0) ? nmp : nmp + p3m.local_mesh.dim[d]*q_ind;
00692 
00693           if (p3m.params.inter == 0)
00694             /* distance to nearest mesh point */
00695             dist[d] = (pos-nmp)-0.5;
00696           else
00697             /* distance to nearest mesh point for interpolation */
00698             arg[d] = (int)((pos-nmp)*p3m.params.inter2);
00699         }
00700 
00701         if (p3m.params.inter == 0) {
00702           for(i0=0; i0<p3m.params.cao; i0++) {
00703             tmp0 = p3m_caf(i0, dist[0],p3m.params.cao);
00704             for(i1=0; i1<p3m.params.cao; i1++) {
00705               tmp1 = tmp0 * p3m_caf(i1, dist[1],p3m.params.cao);
00706               for(i2=0; i2<p3m.params.cao; i2++) {
00707                 cur_ca_frac_val = q * tmp1 * p3m_caf(i2, dist[2], p3m.params.cao);
00708                 p[i].f.f[d_rs] -= force_prefac*cur_ca_frac_val*p3m.rs_mesh[q_ind];
00709                 q_ind++;
00710               }
00711               q_ind += p3m.local_mesh.q_2_off;
00712             }
00713             q_ind += p3m.local_mesh.q_21_off;
00714           }
00715         } else {
00716           for(i0=0; i0<p3m.params.cao; i0++) {
00717             tmp0 = p3m.int_caf[i0][arg[0]];
00718             for(i1=0; i1<p3m.params.cao; i1++) {
00719               tmp1 = tmp0 * p3m.int_caf[i1][arg[1]];
00720               for(i2=0; i2<p3m.params.cao; i2++) {
00721                 cur_ca_frac_val = q * tmp1 * p3m.int_caf[i2][arg[2]];
00722                 p[i].f.f[d_rs] -= force_prefac*cur_ca_frac_val*p3m.rs_mesh[q_ind];
00723                 q_ind++;
00724               }
00725               q_ind += p3m.local_mesh.q_2_off;
00726             }
00727             q_ind += p3m.local_mesh.q_21_off;
00728           }
00729         }
00730 #endif
00731 
00732         ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M  f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum));
00733       }
00734     }
00735   }
00736 }
00737 
00738 
00739 
00740 double p3m_calc_kspace_forces(int force_flag, int energy_flag)
00741 {
00742     int i,d,d_rs,ind,j[3];
00743     /**************************************************************/
00744     /* Prefactor for force */
00745     double force_prefac;
00746     /* k space energy */
00747     double k_space_energy=0.0, node_k_space_energy=0.0;
00748     /* directions */
00749     double *d_operator = NULL;
00750 
00751     P3M_TRACE(fprintf(stderr,"%d: p3m_perform: \n",this_node));
00752 //     fprintf(stderr, "calculating kspace forces\n");
00753 
00754     force_prefac = coulomb.prefactor / ( 2 * box_l[0] * box_l[1] * box_l[2] );
00755 
00756     /* Gather information for FFT grid inside the nodes domain (inner local mesh) */
00757     /* and Perform forward 3D FFT (Charge Assignment Mesh). */
00758     if (p3m.sum_q2 > 0) {
00759         p3m_gather_fft_grid(p3m.rs_mesh);
00760         fft_perform_forw(p3m.rs_mesh);
00761     }
00762     //Note: after these calls, the grids are in the order yzx and not xyz anymore!!!
00763 
00764     /* === K Space Calculations === */
00765     P3M_TRACE(fprintf(stderr,"%d: p3m_perform: k-Space\n",this_node));
00766 
00767     /* === K Space Energy Calculation  === */
00768 //     if(energy_flag && p3m.sum_q2 > 0) {
00769     if (energy_flag) {
00770         /*********************
00771         Coulomb energy
00772         **********************/
00773 
00774 
00775       for(i=0;i<fft.plan[3].new_size;i++) {
00776         // Use the energy optimized influence function for energy!
00777         node_k_space_energy += p3m.g_energy[i] * ( SQR(p3m.rs_mesh[2*i]) + SQR(p3m.rs_mesh[2*i+1]) );
00778       }
00779         node_k_space_energy *= force_prefac;
00780 
00781         MPI_Reduce(&node_k_space_energy, &k_space_energy, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart);
00782         if(this_node==0) {
00783             /* self energy correction */
00784             k_space_energy -= coulomb.prefactor*(p3m.sum_q2 * p3m.params.alpha * wupii);
00785             /* net charge correction */
00786             k_space_energy -= coulomb.prefactor* p3m.square_sum_q * PI / (2.0*box_l[0]*box_l[1]*box_l[2]*SQR(p3m.params.alpha));
00787         }
00788 
00789     } /* if (energy_flag) */
00790 
00791     /* === K Space Force Calculation  === */
00792     if(force_flag && p3m.sum_q2 > 0) {
00793        /***************************
00794         COULOMB FORCES (k-space)
00795         ****************************/
00796         /* Force preparation */
00797         ind = 0;
00798         /* apply the influence function */
00799         for(i=0; i<fft.plan[3].new_size; i++) {
00800             p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++;
00801             p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++;
00802         } 
00803 
00804         /* === 3 Fold backward 3D FFT (Force Component Meshs) === */
00805 
00806         /* Force component loop */
00807         for(d=0;d<3;d++) {
00808             if (d == KX)
00809                 d_operator = p3m.d_op[RX];
00810             else if (d == KY)
00811                 d_operator = p3m.d_op[RY];
00812             else if (d == KZ)
00813                 d_operator = p3m.d_op[RZ];
00814 
00815             /* direction in k space: */
00816             d_rs = (d+p3m.ks_pnum)%3;
00817             /* srqt(-1)*k differentiation */
00818             ind=0;
00819             for(j[0]=0; j[0]<fft.plan[3].new_mesh[0]; j[0]++) {
00820                 for(j[1]=0; j[1]<fft.plan[3].new_mesh[1]; j[1]++) {
00821                     for(j[2]=0; j[2]<fft.plan[3].new_mesh[2]; j[2]++) {
00822                         /* i*k*(Re+i*Im) = - Im*k + i*Re*k     (i=sqrt(-1)) */
00823                         p3m.rs_mesh[ind] = -2.0*PI*(p3m.ks_mesh[ind+1] * d_operator[ j[d]+fft.plan[3].start[d] ])/box_l[d_rs]; 
00824                         ind++;
00825                         p3m.rs_mesh[ind] =   2.0*PI*p3m.ks_mesh[ind-1] * d_operator[ j[d]+fft.plan[3].start[d] ]/box_l[d_rs];  
00826                         ind++;
00827                     }
00828                 }
00829             }
00830             fft_perform_back(p3m.rs_mesh);              /* Back FFT force component mesh */
00831             p3m_spread_force_grid(p3m.rs_mesh);             /* redistribute force component mesh */
00832             P3M_assign_forces(force_prefac, d_rs);  /* Assign force component from mesh to particle */
00833         }
00834     } /* if(force_flag) */
00835 
00836     if (p3m.params.epsilon != P3M_EPSILON_METALLIC) {
00837       k_space_energy += p3m_calc_dipole_term(force_flag, energy_flag);
00838     }
00839 
00840     return k_space_energy;
00841 }
00842 
00843 
00844 /************************************************************/
00845 
00846 double p3m_calc_dipole_term(int force_flag, int energy_flag)
00847 {
00848   int np, c, i, j;
00849   Particle *part;
00850   double pref = coulomb.prefactor*4*M_PI*box_l_i[0]*box_l_i[1]*box_l_i[2]/(2*p3m.params.epsilon + 1);
00851   double lcl_dm[3], gbl_dm[3];
00852   double en;
00853 
00854   for (j = 0; j < 3; j++)
00855     lcl_dm[j] = 0;
00856 
00857   for (c = 0; c < local_cells.n; c++) {
00858     np   = local_cells.cell[c]->n;
00859     part = local_cells.cell[c]->part;
00860     for (i = 0; i < np; i++) {
00861       for (j = 0; j < 3; j++)
00862         /* dipole moment with unfolded coordinates */
00863         lcl_dm[j] += part[i].p.q*(part[i].r.p[j] + part[i].l.i[j]*box_l[j]);
00864     }
00865   }
00866   
00867   MPI_Allreduce(lcl_dm, gbl_dm, 3, MPI_DOUBLE, MPI_SUM, comm_cart);
00868 
00869   if (energy_flag)
00870     en = 0.5*pref*(SQR(gbl_dm[0]) + SQR(gbl_dm[1]) + SQR(gbl_dm[2]));
00871   else
00872     en = 0;
00873   if (force_flag) {
00874     for (j = 0; j < 3; j++)
00875       gbl_dm[j] *= pref;
00876     for (c = 0; c < local_cells.n; c++) {
00877       np   = local_cells.cell[c]->n;
00878       part = local_cells.cell[c]->part;
00879       for (i = 0; i < np; i++)
00880         for (j = 0; j < 3; j++)
00881           part[i].f.f[j] -= gbl_dm[j]*part[i].p.q;
00882     }
00883   }
00884   return en;
00885 }
00886 
00887 /************************************************************/
00888 
00889 void p3m_gather_fft_grid(double* themesh)
00890 {
00891   int s_dir,r_dir,evenodd;
00892   MPI_Status status;
00893   double *tmp_ptr;
00894 
00895   P3M_TRACE(fprintf(stderr,"%d: p3m_gather_fft_grid:\n",this_node));
00896 
00897   /* direction loop */
00898   for(s_dir=0; s_dir<6; s_dir++) {
00899     if(s_dir%2==0) r_dir = s_dir+1;
00900     else           r_dir = s_dir-1;
00901     /* pack send block */ 
00902     if(p3m.sm.s_size[s_dir]>0) 
00903       fft_pack_block(themesh, p3m.send_grid, p3m.sm.s_ld[s_dir], p3m.sm.s_dim[s_dir], p3m.local_mesh.dim, 1);
00904       
00905     /* communication */
00906     if(node_neighbors[s_dir] != this_node) {
00907       for(evenodd=0; evenodd<2;evenodd++) {
00908         if((node_pos[s_dir/2]+evenodd)%2==0) {
00909           if(p3m.sm.s_size[s_dir]>0) 
00910             MPI_Send(p3m.send_grid, p3m.sm.s_size[s_dir], MPI_DOUBLE, 
00911                      node_neighbors[s_dir], REQ_P3M_GATHER, comm_cart);
00912         }
00913         else {
00914           if(p3m.sm.r_size[r_dir]>0) 
00915             MPI_Recv(p3m.recv_grid, p3m.sm.r_size[r_dir], MPI_DOUBLE, 
00916                      node_neighbors[r_dir], REQ_P3M_GATHER, comm_cart, &status);            
00917         }
00918       }
00919     }
00920     else {
00921       tmp_ptr = p3m.recv_grid;
00922       p3m.recv_grid = p3m.send_grid;
00923       p3m.send_grid = tmp_ptr;
00924     }
00925     /* add recv block */
00926     if(p3m.sm.r_size[r_dir]>0) {
00927       p3m_add_block(p3m.recv_grid, themesh, p3m.sm.r_ld[r_dir], p3m.sm.r_dim[r_dir], p3m.local_mesh.dim); 
00928     }
00929   }
00930 }
00931 
00932 
00933 void p3m_spread_force_grid(double* themesh)
00934 {
00935   int s_dir,r_dir,evenodd;
00936   MPI_Status status;
00937   double *tmp_ptr;
00938   P3M_TRACE(fprintf(stderr,"%d: p3m_spread_force_grid:\n",this_node));
00939 
00940   /* direction loop */
00941   for(s_dir=5; s_dir>=0; s_dir--) {
00942     if(s_dir%2==0) r_dir = s_dir+1;
00943     else           r_dir = s_dir-1;
00944     /* pack send block */ 
00945     if(p3m.sm.s_size[s_dir]>0) 
00946       fft_pack_block(themesh, p3m.send_grid, p3m.sm.r_ld[r_dir], p3m.sm.r_dim[r_dir], p3m.local_mesh.dim, 1);
00947     /* communication */
00948     if(node_neighbors[r_dir] != this_node) {
00949       for(evenodd=0; evenodd<2;evenodd++) {
00950         if((node_pos[r_dir/2]+evenodd)%2==0) {
00951           if(p3m.sm.r_size[r_dir]>0) 
00952             MPI_Send(p3m.send_grid, p3m.sm.r_size[r_dir], MPI_DOUBLE, 
00953                      node_neighbors[r_dir], REQ_P3M_SPREAD, comm_cart);
00954         }
00955         else {
00956           if(p3m.sm.s_size[s_dir]>0) 
00957             MPI_Recv(p3m.recv_grid, p3m.sm.s_size[s_dir], MPI_DOUBLE, 
00958                      node_neighbors[s_dir], REQ_P3M_SPREAD, comm_cart, &status);            
00959         }
00960       }
00961     }
00962     else {
00963       tmp_ptr = p3m.recv_grid;
00964       p3m.recv_grid = p3m.send_grid;
00965       p3m.send_grid = tmp_ptr;
00966     }
00967     /* un pack recv block */
00968     if(p3m.sm.s_size[s_dir]>0) {
00969       fft_unpack_block(p3m.recv_grid, themesh, p3m.sm.s_ld[s_dir], p3m.sm.s_dim[s_dir], p3m.local_mesh.dim, 1); 
00970     }
00971   }
00972 }
00973 
00974 #ifdef P3M_STORE_CA_FRAC
00975 void p3m_realloc_ca_fields(int newsize)
00976 {
00977   newsize = ((newsize + CA_INCREMENT - 1)/CA_INCREMENT)*CA_INCREMENT;
00978   if (newsize == p3m.ca_num) return;
00979   if (newsize < CA_INCREMENT) newsize = CA_INCREMENT;
00980 
00981   P3M_TRACE(fprintf(stderr,"%d: p3m_realloc_ca_fields: old_size=%d -> new_size=%d\n",this_node,p3m.ca_num,newsize));
00982   p3m.ca_num = newsize;
00983   p3m.ca_frac = (double *)realloc(p3m.ca_frac, p3m.params.cao3*p3m.ca_num*sizeof(double));
00984   p3m.ca_fmp  = (int *)realloc(p3m.ca_fmp, p3m.ca_num*sizeof(int));
00985     
00986 } 
00987 #endif
00988 
00989 void p3m_calc_meshift(void)
00990 {
00991     int i;
00992     
00993     p3m.meshift_x = (double *) realloc(p3m.meshift_x, p3m.params.mesh[0]*sizeof(double));
00994     p3m.meshift_y = (double *) realloc(p3m.meshift_y, p3m.params.mesh[1]*sizeof(double));
00995     p3m.meshift_z = (double *) realloc(p3m.meshift_z, p3m.params.mesh[2]*sizeof(double));
00996 
00997     p3m.meshift_x[0] = p3m.meshift_y[0] = p3m.meshift_z[0] = 0;
00998     for (i = 1; i <= p3m.params.mesh[RX]/2; i++) {
00999         p3m.meshift_x[i] = i;
01000         p3m.meshift_x[p3m.params.mesh[0] - i] = -i;
01001     }
01002 
01003     for (i = 1; i <= p3m.params.mesh[RY]/2; i++) {
01004         p3m.meshift_y[i] = i;
01005         p3m.meshift_y[p3m.params.mesh[1] - i] = -i;
01006     }
01007 
01008     for (i = 1; i <= p3m.params.mesh[RZ]/2; i++) {
01009         p3m.meshift_z[i] = i;
01010         p3m.meshift_z[p3m.params.mesh[2] - i] = -i;
01011     }
01012     
01013 }
01014 
01015 
01016 
01017 void p3m_calc_differential_operator()
01018 {
01019   int i,j;
01020 
01021   for(i=0;i<3;i++) {
01022     p3m.d_op[i] = realloc(p3m.d_op[i], p3m.params.mesh[i]*sizeof(double));
01023     p3m.d_op[i][0] = 0;
01024     p3m.d_op[i][p3m.params.mesh[i]/2] = 0.0;
01025 
01026     for(j = 1; j < p3m.params.mesh[i]/2; j++) {
01027       p3m.d_op[i][j] = j;
01028       p3m.d_op[i][p3m.params.mesh[i] - j] = -j;
01029     }
01030   }
01031 }
01032 
01033 void p3m_calc_influence_function_force()
01034 {
01035     int i, n[3], ind;
01036     int end[3];
01037     int size = 1;
01038     double fak1,fak2,fak3;
01039     double nominator[3] = {0.0, 0.0, 0.0}, denominator = 0.0;
01040 
01041     p3m_calc_meshift();
01042 
01043     for(i=0;i<3;i++) {
01044         size *= fft.plan[3].new_mesh[i];
01045         end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i];
01046     }
01047     p3m.g_force = (double *) realloc(p3m.g_force, size*sizeof(double));
01048 
01049     for(n[0]=fft.plan[3].start[0]; n[0]<end[0]; n[0]++) {
01050         for(n[1]=fft.plan[3].start[1]; n[1]<end[1]; n[1]++) {
01051             for(n[2]=fft.plan[3].start[2]; n[2]<end[2]; n[2]++) {
01052                 ind = (n[2]-fft.plan[3].start[2])
01053                     + fft.plan[3].new_mesh[2] * ((n[1]-fft.plan[3].start[1])
01054                     + (fft.plan[3].new_mesh[1]*(n[0]-fft.plan[3].start[0])));
01055 
01056                 if( (n[KX]%(p3m.params.mesh[RX]/2)==0) && (n[KY]%(p3m.params.mesh[RY]/2)==0) && (n[KZ]%(p3m.params.mesh[RZ]/2)==0) ) {
01057                     p3m.g_force[ind] = 0.0;
01058                 }
01059                 else {
01060                     denominator = p3m_perform_aliasing_sums_force(n,nominator);
01061 
01062                     fak1 =  p3m.d_op[RX][n[KX]]*nominator[RX]/box_l[RX] + p3m.d_op[RY][n[KY]]*nominator[RY]/box_l[RY] + p3m.d_op[RZ][n[KZ]]*nominator[RZ]/box_l[RZ];
01063                     fak2 = SQR(p3m.d_op[RX][n[KX]]/box_l[RX])+SQR(p3m.d_op[RY][n[KY]]/box_l[RY])+SQR(p3m.d_op[RZ][n[KZ]]/box_l[RZ]);
01064 
01065                     fak3 = fak1/(fak2 * SQR(denominator));
01066                     p3m.g_force[ind] = 2*fak3/(PI);
01067                 }
01068             }
01069         }
01070     }
01071 }
01072 
01073 
01074 double p3m_perform_aliasing_sums_force(int n[3], double numerator[3])
01075 {
01076     int i;
01077     double denominator = 0.0;
01078     /* lots of temporary variables... */
01079     double sx, sy, sz, f1, f2, mx, my, mz, nmx, nmy, nmz, nm2, expo;
01080     double limit = 30;
01081 
01082     for(i = 0; i < 3; i++)
01083         numerator[i] = 0.0;
01084 
01085     f1 = SQR(PI/(p3m.params.alpha));
01086 
01087     for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) {
01088         nmx = p3m.meshift_x[n[KX]] + p3m.params.mesh[RX]*mx;
01089         sx  = pow(sinc(nmx/(double)p3m.params.mesh[RX]),2.0*p3m.params.cao);
01090         for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) {
01091             nmy = p3m.meshift_y[n[KY]] + p3m.params.mesh[RY]*my;
01092             sy  = sx*pow(sinc(nmy/(double)p3m.params.mesh[RY]),2.0*p3m.params.cao);
01093             for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) {
01094                 nmz = p3m.meshift_z[n[KZ]] + p3m.params.mesh[RZ]*mz;
01095                 sz  = sy*pow(sinc(nmz/(double)p3m.params.mesh[RZ]),2.0*p3m.params.cao);
01096 
01097                 nm2          =  SQR(nmx/box_l[RX]) + SQR(nmy/box_l[RY]) + SQR(nmz/box_l[RZ]);
01098                 expo         =  f1*nm2;
01099                 f2           =  (expo<limit) ? sz*exp(-expo)/nm2 : 0.0;
01100 
01101                 numerator[RX] += f2*nmx/box_l[RX];
01102                 numerator[RY] += f2*nmy/box_l[RY];
01103                 numerator[RZ] += f2*nmz/box_l[RZ];
01104 
01105                 denominator  += sz;
01106             }
01107         }
01108     }
01109     return denominator;
01110 }
01111 
01112 void p3m_calc_influence_function_energy()
01113 {
01114     int i,n[3],ind;
01115     int end[3]; int start[3];
01116     int size=1;
01117 
01118     p3m_calc_meshift();
01119 
01120     for(i = 0; i < 3; i++) {
01121       size *= fft.plan[3].new_mesh[i];
01122       end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i];
01123       start[i] = fft.plan[3].start[i];
01124     }
01125 
01126     p3m.g_energy = (double *) realloc(p3m.g_energy, size*sizeof(double));
01127     ind = 0;
01128 
01129 
01130 
01131    for(n[0]=start[0]; n[0]<end[0]; n[0]++) {
01132         for(n[1]=start[1]; n[1]<end[1]; n[1]++) {
01133             for(n[2]=start[2]; n[2]<end[2]; n[2]++) {
01134                 ind = (n[2]-start[2])
01135                     + fft.plan[3].new_mesh[2] * (n[1]-start[1])
01136                     + fft.plan[3].new_mesh[2] * fft.plan[3].new_mesh[1]*(n[0]-start[0]);
01137                 if( (n[KX]%(p3m.params.mesh[RX]/2)==0) && (n[KY]%(p3m.params.mesh[RY]/2)==0) && (n[KZ]%(p3m.params.mesh[RZ]/2)==0) ) {
01138                     p3m.g_energy[ind] = 0.0;
01139                 }
01140 
01141                 else 
01142                   p3m.g_energy[ind] = p3m_perform_aliasing_sums_energy(n)/PI;
01143             }
01144         }
01145     }
01146 }
01147 
01148 double p3m_perform_aliasing_sums_energy(int n[3])
01149 {
01150     double numerator=0.0, denominator=0.0;
01151     /* lots of temporary variables... */
01152     double sx, sy, sz, f1, f2, mx, my, mz, nmx, nmy, nmz, nm2, expo;
01153     double limit = 30;
01154 
01155     f1 = SQR(PI/(p3m.params.alpha));
01156 
01157     for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) {
01158         nmx = p3m.meshift_x[n[KX]] + p3m.params.mesh[RX]*mx;
01159         sx  = pow(sinc(nmx/(double)p3m.params.mesh[RX]),2.0*p3m.params.cao);
01160         for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) {
01161             nmy = p3m.meshift_y[n[KY]] + p3m.params.mesh[RY]*my;
01162             sy  = sx*pow(sinc(nmy/(double)p3m.params.mesh[RY]),2.0*p3m.params.cao);
01163             for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) {
01164                 nmz = p3m.meshift_z[n[KZ]] + p3m.params.mesh[RZ]*mz;
01165                 sz  = sy*pow(sinc(nmz/(double)p3m.params.mesh[RZ]),2.0*p3m.params.cao);
01166                 /* k = 2*pi * (nx/lx, ny/ly, nz/lz); expo = -k^2 / 4*alpha^2 */
01167                 nm2          =  SQR(nmx/box_l[RX]) + SQR(nmy/box_l[RY]) + SQR(nmz/box_l[RZ]);
01168                 expo         =  f1*nm2;
01169                 f2           =  (expo<limit) ? sz*exp(-expo)/nm2 : 0.0;
01170 
01171                 numerator += f2;
01172                 denominator  += sz;
01173             }
01174         }
01175     }
01176 
01177     return numerator/SQR(denominator);
01178 }
01179 
01180 
01181 
01182 /************************************************
01183  * Functions for P3M Parameter tuning
01184  * This tuning is based on P3M_tune by M. Deserno
01185  ************************************************/
01186 
01187 #define P3M_TUNE_MAX_CUTS 50
01188 
01189 /** get the minimal error for this combination of parameters. In fact,
01190     the real space error is tuned such that it contributes half of the
01191     total error, and then the Fourier space error is
01192     calculated. Returns the error and the optimal alpha, or 0 if this
01193     combination does not work at all */
01194 static double p3m_get_accuracy(int mesh[3], int cao, double r_cut_iL, double *_alpha_L, double *_rs_err, double *_ks_err)
01195 {
01196    double rs_err, ks_err;
01197    double alpha_L;
01198    P3M_TRACE(fprintf(stderr, "p3m_get_accuracy: mesh (%d, %d, %d), cao %d, r_cut %f ", mesh[0], mesh[1], mesh[2], cao, r_cut_iL));
01199  
01200    /* calc maximal real space error for setting */
01201    rs_err = p3m_real_space_error(coulomb.prefactor,r_cut_iL,p3m.sum_qpart,p3m.sum_q2,0);
01202  
01203    if(M_SQRT2*rs_err > p3m.params.accuracy) {
01204    /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
01205      alpha_L = sqrt(log(M_SQRT2*rs_err/p3m.params.accuracy)) / r_cut_iL;
01206    }
01207    else {
01208     /* even alpha=0 is ok, however, we cannot choose it since it kills the k-space error formula.
01209        Anyways, this very likely NOT the optimal solution */
01210     alpha_L = 0.1;
01211   }
01212 
01213   *_alpha_L = alpha_L;
01214   /* calculate real space and k space error for this alpha_L */
01215   rs_err = p3m_real_space_error(coulomb.prefactor,r_cut_iL,p3m.sum_qpart,p3m.sum_q2,alpha_L);
01216   ks_err = p3m_k_space_error(coulomb.prefactor,mesh,cao,p3m.sum_qpart,p3m.sum_q2,alpha_L);
01217 
01218   *_rs_err = rs_err;
01219   *_ks_err = ks_err;
01220   P3M_TRACE(fprintf(stderr, "resulting: alpha_L %g -> rs_err: %g, ks_err %g, total_err %g\n", alpha_L, rs_err, ks_err,sqrt(SQR(rs_err)+SQR(ks_err))));
01221   return sqrt(SQR(rs_err)+SQR(ks_err));
01222 }
01223 
01224 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao, r_cut and alpha */
01225 static double p3m_mcr_time(int mesh[3], int cao, double r_cut_iL, double alpha_L)
01226 {
01227   /* rounded up 2000/n_charges timing force evaluations */
01228   int int_num = (1999 + p3m.sum_qpart)/p3m.sum_qpart;
01229   double int_time;
01230 
01231   /* broadcast p3m parameters for test run */
01232   if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M)
01233     coulomb.method = COULOMB_P3M;
01234   p3m.params.r_cut_iL = r_cut_iL;
01235   p3m.params.mesh[0] = mesh[0];
01236   p3m.params.mesh[1] = mesh[1];
01237   p3m.params.mesh[2] = mesh[2];
01238   p3m.params.cao      = cao;
01239   p3m.params.alpha_L  = alpha_L;
01240   p3m_scaleby_box_l();
01241   /* initialize p3m structures */
01242   mpi_bcast_coulomb_params();
01243   /* perform force calculation test */
01244   int_time = time_force_calc(int_num);
01245   P3M_TRACE(fprintf(stderr, "%d: test integration with mesh (%d %d %d), r_cut_iL %lf, cao %d, alpha_L %lf returned %lf.\n", this_node, mesh[0], mesh[1], mesh[2], r_cut_iL, cao, alpha_L, int_time));
01246   return int_time;
01247 }
01248 
01249 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao. The r_cut is determined via
01250     a simple bisection. Returns -1 if the force evaluation does not work, -2 if there is no valid r_cut, and -3 if
01251     the charge assigment order is to large for this grid */
01252 static double p3m_mc_time(char **log, int mesh[3], int cao,
01253                           double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL,
01254                           double *_alpha_L, double *_accuracy)
01255 {
01256   double int_time;
01257   double r_cut_iL;
01258   double rs_err, ks_err, mesh_size, k_cut;
01259   int i, n_cells;
01260   char b[5*ES_DOUBLE_SPACE + 3*ES_INTEGER_SPACE + 128];
01261 
01262   /* initial checks. */
01263   mesh_size = box_l[0]/(double)mesh[0];
01264   k_cut =  mesh_size*cao/2.0;
01265   P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh=(%d, %d, %d), cao=%d, rmin=%f, rmax=%f\n",
01266                     mesh[0],mesh[1],mesh[2], cao, r_cut_iL_min, r_cut_iL_max));
01267   if(cao >= imin(mesh[0],imin(mesh[1],mesh[2])) || k_cut >= (dmin(min_box_l,min_local_box_l) - skin)) {
01268     sprintf(b, "%-4d %-3d cao too large for this mesh\n", mesh[0], cao);
01269     *log = strcat_alloc(*log, b);
01270     return -3;
01271   }
01272 
01273   /* Either low and high boundary are equal (for fixed cut), or the low border is initially 0 and therefore
01274      has infinite error estimate, as required. Therefore if the high boundary fails, there is no possible r_cut */
01275   if ((*_accuracy = p3m_get_accuracy(mesh, cao, r_cut_iL_max, _alpha_L, &rs_err, &ks_err)) > p3m.params.accuracy) {
01276     /* print result */
01277     P3M_TRACE(puts("p3m_mc_time: accuracy not achieved."));
01278     sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e accuracy not achieved\n",
01279             mesh[0], cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err);
01280     *log = strcat_alloc(*log, b);
01281     return -2;
01282   }
01283 
01284   for (;;) {
01285     P3M_TRACE(fprintf(stderr, "p3m_mc_time: interval [%f,%f]\n", r_cut_iL_min, r_cut_iL_max));
01286     r_cut_iL = 0.5*(r_cut_iL_min + r_cut_iL_max);
01287 
01288     if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC)
01289       break;
01290 
01291     /* bisection */
01292     if ((p3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err) > p3m.params.accuracy))
01293       r_cut_iL_min = r_cut_iL;
01294     else
01295       r_cut_iL_max = r_cut_iL;
01296   }
01297 
01298   /* final result is always the upper interval boundary, since only there
01299      we know that the desired minimal accuracy is obtained */
01300   *_r_cut_iL = r_cut_iL = r_cut_iL_max;
01301 
01302   /* check whether we are running P3M+ELC, and whether we leave a reasonable gap space */
01303   if (coulomb.method == COULOMB_ELC_P3M && elc_params.gap_size <= 1.1*r_cut_iL*box_l[0]) {
01304     P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f reject r_cut %f > gap %f\n", mesh[0],mesh[1],mesh[2], cao, r_cut_iL,
01305                      2*r_cut_iL*box_l[0], elc_params.gap_size));
01306     /* print result */
01307     sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e conflict with ELC\n",
01308             mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err);
01309     *log = strcat_alloc(*log, b);
01310     return -P3M_TUNE_ELCTEST;
01311   }
01312 
01313   /* check whether this radius is too large, so that we would use less cells than allowed */
01314   n_cells = 1;
01315   for (i = 0; i < 3; i++)
01316     n_cells *= (int)(floor(local_box_l[i]/(r_cut_iL*box_l[0] + skin)));
01317   if (n_cells < min_num_cells) {
01318     P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f reject n_cells %d\n", mesh[0], mesh[1], mesh[2], cao, r_cut_iL, n_cells));
01319     /* print result */
01320     sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e radius dangerously high\n\n",
01321             mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err);
01322     *log = strcat_alloc(*log, b);
01323   }
01324   int_time = p3m_mcr_time(mesh, cao, r_cut_iL, *_alpha_L);
01325   if (int_time == -1) {
01326     *log = strcat_alloc(*log, "tuning failed, test integration not possible\n");
01327     return -P3M_TUNE_FAIL;
01328   }
01329 
01330   *_accuracy = p3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err);
01331 
01332   P3M_TRACE(fprintf(stderr, "p3m_mc_time: mesh (%d, %d, %d) cao %d r_cut %f time %f\n", mesh[0], mesh[1], mesh[2], cao, r_cut_iL, int_time));
01333   /* print result */
01334   sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e %-8d\n",
01335           mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err, (int)int_time);
01336   *log = strcat_alloc(*log, b);
01337   return int_time;
01338 }
01339 
01340 /** get the optimal alpha and the corresponding computation time for fixed mesh. *cao
01341     should contain an initial guess, which is then adapted by stepping up and down. Returns the time
01342     upon completion, -1 if the force evaluation does not work, and -2 if the accuracy cannot be met */
01343 static double p3m_m_time(char **log, int mesh[3],
01344                         int cao_min, int cao_max, int *_cao,
01345                         double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL,
01346                         double *_alpha_L, double *_accuracy)
01347 {
01348   double best_time = -1, tmp_time, tmp_r_cut_iL=0.0, tmp_alpha_L=0.0, tmp_accuracy=0.0;
01349   /* in which direction improvement is possible. Initially, we dont know it yet. */
01350   int final_dir = 0;
01351   int cao = *_cao;
01352 
01353   P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d %d), cao_min=%d, cao_max=%d, rmin=%f, rmax=%f\n",
01354                    mesh[0],mesh[1],mesh[2], cao_min, cao_max, r_cut_iL_min, r_cut_iL_max));
01355   /* the initial step sets a timing mark. If there is no valid r_cut, we can only try
01356      to increase cao to increase the obtainable precision of the far formula. */
01357   do {
01358     tmp_time = p3m_mc_time(log, mesh, cao,  r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01359     /* bail out if the force evaluation is not working */
01360     if (tmp_time == -1) return -1;
01361     /* cao is too large for this grid, but still the accuracy cannot be achieved, give up */
01362     if (tmp_time == -3) {
01363       P3M_TRACE(fprintf(stderr, "p3m_m_time: no possible cao found\n"));
01364       return -2;
01365     }
01366     /* we have a valid time, start optimising from there */
01367     if (tmp_time >= 0) {
01368       best_time  = tmp_time;
01369       *_r_cut_iL = tmp_r_cut_iL;
01370       *_alpha_L  = tmp_alpha_L;
01371       *_accuracy = tmp_accuracy;
01372       *_cao      = cao;
01373       break;
01374     }
01375     /* the required accuracy could not be obtained, try higher caos. Therefore optimisation can only be
01376        obtained with even higher caos, but not lower ones */
01377     P3M_TRACE(fprintf(stderr, "p3m_m_time: doesn't give precision, step up\n"));
01378     cao++;
01379     final_dir = 1;
01380   }
01381   while (cao <= cao_max);
01382   /* with this mesh, the required accuracy cannot be obtained. */
01383   if (cao > cao_max) return -2;
01384 
01385   /* at the boundaries, only the opposite direction can be used for optimisation */
01386   if (cao == cao_min)      final_dir = 1;
01387   else if (cao == cao_max) final_dir = -1;
01388 
01389   P3M_TRACE(fprintf(stderr, "p3m_m_time: final constraints dir %d\n", final_dir));
01390 
01391   if (final_dir == 0) {
01392     /* check in which direction we can optimise. Both directions are possible */
01393     double dir_times[3];
01394     for (final_dir = -1; final_dir <= 1; final_dir += 2) {
01395       dir_times[final_dir + 1] = tmp_time =
01396         p3m_mc_time(log, mesh, cao + final_dir,  r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01397       /* bail out on errors, as usual */
01398       if (tmp_time == -1) return -P3M_TUNE_FAIL;
01399       /* in this direction, we cannot optimise, since we get into precision trouble */
01400       if (tmp_time < 0) continue;
01401 
01402       if (tmp_time < best_time) {
01403         best_time  = tmp_time;
01404         *_r_cut_iL = tmp_r_cut_iL;
01405         *_alpha_L  = tmp_alpha_L;
01406         *_accuracy = tmp_accuracy;
01407         *_cao      = cao + final_dir;
01408       }
01409     }
01410     /* choose the direction which was optimal, if any of the two */
01411     if      (dir_times[0] == best_time) { final_dir = -1; }
01412     else if (dir_times[2] == best_time) { final_dir = 1; }
01413     else {
01414       /* no improvement in either direction, however if one is only marginally worse, we can still try*/
01415       /* down is possible and not much worse, while up is either illegal or even worse */
01416       if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) &&
01417           (dir_times[2] < 0 || dir_times[2] > dir_times[0]))
01418         final_dir = -1;
01419       /* same for up */
01420       else if ((dir_times[2] >= 0 && dir_times[2] < best_time + P3M_TIME_GRAN) &&
01421                (dir_times[0] < 0 || dir_times[0] > dir_times[2]))
01422         final_dir = 1;
01423       else {
01424         /* really no chance for optimisation */
01425         P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d, %d) final cao=%d time=%f\n",mesh[0],mesh[1],mesh[2], cao, best_time));
01426         return best_time;
01427       }
01428     }
01429     /* we already checked the initial cao and its neighbor */
01430     cao += 2*final_dir;
01431   }
01432   else {
01433     /* here some constraint is active, and we only checked the initial cao itself */
01434     cao += final_dir;
01435   }
01436 
01437   P3M_TRACE(fprintf(stderr, "p3m_m_time: optimise in direction %d\n", final_dir));
01438 
01439   /* move cao into the optimisation direction until we do not gain anymore. */
01440   for (; cao >= cao_min && cao <= cao_max; cao += final_dir) {
01441     tmp_time = p3m_mc_time(log, mesh, cao,  r_cut_iL_min, r_cut_iL_max,
01442                            &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01443     /* bail out on errors, as usual */
01444     if (tmp_time == -1) return -1;
01445     /* if we cannot meet the precision anymore, give up */
01446     if (tmp_time < 0) break;
01447 
01448     if (tmp_time < best_time) {
01449       best_time  = tmp_time;
01450       *_r_cut_iL = tmp_r_cut_iL;
01451       *_alpha_L  = tmp_alpha_L;
01452       *_accuracy = tmp_accuracy;
01453       *_cao      = cao;
01454     }
01455     /* no hope of further optimisation */
01456     else if (tmp_time > best_time + P3M_TIME_GRAN)
01457       break;
01458   }
01459   P3M_TRACE(fprintf(stderr, "p3m_m_time: mesh=(%d, %d, %d) final cao=%d r_cut=%f time=%f\n",mesh[0],mesh[1],mesh[2], *_cao, *_r_cut_iL, best_time));
01460   return best_time;
01461 }
01462 
01463 int p3m_adaptive_tune(char **log) {
01464   int  mesh[3] = {0, 0, 0}; 
01465   int tmp_mesh[3];
01466   double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1, tmp_r_cut_iL=0.0;
01467   int    cao_min, cao_max,           cao      = -1, tmp_cao;
01468   double                             alpha_L  = -1, tmp_alpha_L=0.0;
01469   double                             accuracy = -1, tmp_accuracy=0.0;
01470   double                            time_best=1e20, tmp_time;
01471   double mesh_density = 0.0, mesh_density_min, mesh_density_max;
01472   char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128];
01473 
01474   if (skin == -1) {
01475     *log = strcat_alloc(*log, "p3m cannot be tuned, since the skin is not yet set");
01476     return ES_ERROR;
01477   }
01478 
01479   if (p3m.params.epsilon != P3M_EPSILON_METALLIC) {
01480     if( !((box_l[0] == box_l[1]) &&
01481           (box_l[1] == box_l[2]))) {
01482       *log = strcat_alloc(*log, "{049 P3M_init: Nonmetallic epsilon requires cubic box} ");
01483       return ES_ERROR;
01484     }
01485   }
01486 
01487   /* preparation */
01488   mpi_bcast_event(P3M_COUNT_CHARGES);
01489   /* Print Status */
01490   sprintf(b, "P3M tune parameters: Accuracy goal = %.5e\n", p3m.params.accuracy);
01491   *log = strcat_alloc(*log, b);
01492   sprintf(b, "System: box_l = %.5e # charged part = %d Sum[q_i^2] = %.5e\n",
01493           box_l[0], p3m.sum_qpart, p3m.sum_q2);
01494   *log = strcat_alloc(*log, b);
01495 
01496   if (p3m.sum_qpart == 0) {
01497     *log = strcat_alloc(*log, "no charged particles in the system, cannot tune P3M");
01498     return ES_ERROR;
01499   }
01500 
01501   /* parameter ranges */
01502   /* if at least the number of meshpoints in one direction is not set, we have to tune it. */
01503   if (p3m.params.mesh[0] == 0 || p3m.params.mesh[1] == 0 || p3m.params.mesh[2] == 0) {
01504     mesh_density_min = pow(p3m.sum_qpart / (box_l[0] * box_l[1] * box_l[2]) , 1.0/3.0);
01505     mesh_density_max = 256 / pow(box_l[0] * box_l[1] * box_l[2], 1.0/3.0);
01506     /* this limits the tried meshes if the accuracy cannot
01507        be obtained with smaller meshes, but normally not all these
01508        meshes have to be tested */
01509     /* avoid using more than 1 GB of FFT arrays (per default, see config.h) */
01510 
01511     P3M_TRACE(fprintf(stderr, "%d: starting with meshdensity %lf, using at most %lf.\n", this_node, mesh_density_min, mesh_density_max));
01512 
01513   } else {
01514     mesh_density = mesh_density_min = mesh_density_max = p3m.params.mesh[0] / box_l[0];
01515 
01516     sprintf(b, "fixed mesh %d %d %d\n", p3m.params.mesh[0], p3m.params.mesh[1], p3m.params.mesh[2]);
01517     *log = strcat_alloc(*log, b);
01518   }
01519   
01520   if(p3m.params.r_cut_iL == 0.0) {
01521     r_cut_iL_min = 0;
01522     r_cut_iL_max = dmin(min_local_box_l, min_box_l/2.0) - skin;
01523     r_cut_iL_min *= box_l_i[0];
01524     r_cut_iL_max *= box_l_i[0];
01525   }
01526   else {
01527     r_cut_iL_min = r_cut_iL_max = p3m.params.r_cut_iL;
01528 
01529     sprintf(b, "fixed r_cut_iL %f\n", p3m.params.r_cut_iL);
01530     *log = strcat_alloc(*log, b);
01531   }
01532 
01533   if(p3m.params.cao == 0) {
01534     cao_min = 1;
01535     cao_max = 7;
01536     cao = 3;
01537   }
01538   else {
01539     cao_min = cao_max = cao = p3m.params.cao;
01540 
01541     sprintf(b, "fixed cao %d\n", p3m.params.cao);
01542     *log = strcat_alloc(*log, b);
01543   }
01544 
01545   *log = strcat_alloc(*log, "mesh cao r_cut_iL     alpha_L      err          rs_err     ks_err     time [ms]\n");
01546 
01547   /* mesh loop */
01548   /* we're tuning the density of mesh points, which is the same in every direction. */
01549   for (mesh_density=mesh_density_min;mesh_density<=mesh_density_max;mesh_density+=0.1) {
01550     tmp_cao = cao;
01551 
01552     P3M_TRACE(fprintf(stderr, "%d: trying meshdensity %lf.\n", this_node, mesh_density));
01553 
01554     tmp_mesh[0] = (int)(box_l[0]*mesh_density);
01555     tmp_mesh[1] = (int)(box_l[1]*mesh_density);
01556     tmp_mesh[2] = (int)(box_l[2]*mesh_density);
01557 
01558     if(tmp_mesh[0] % 2)
01559       tmp_mesh[0]++;
01560     if(tmp_mesh[1] % 2) 
01561       tmp_mesh[1]++;
01562     if(tmp_mesh[2] % 2)
01563       tmp_mesh[2]++;
01564 
01565     tmp_time = p3m_m_time(log, tmp_mesh,
01566                           cao_min, cao_max, &tmp_cao,
01567                           r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL,
01568                           &tmp_alpha_L, &tmp_accuracy); 
01569     /* some error occured during the tuning force evaluation */
01570     P3M_TRACE(fprintf(stderr,"delta_acceracy: %lf tune time: %lf\n", p3m.params.accuracy - tmp_accuracy,tmp_time));
01571     //    if (tmp_time == -1) con;
01572     /* this mesh does not work at all */
01573     if (tmp_time < 0.0) continue;
01574 
01575     /* the optimum r_cut for this mesh is the upper limit for higher meshes,
01576        everything else is slower */
01577     r_cut_iL_max = tmp_r_cut_iL;
01578 
01579     /* new optimum */
01580     if (tmp_time < time_best) {
01581       P3M_TRACE(fprintf(stderr, "Found new optimum: time %lf, mesh (%d %d %d)\n", tmp_time, tmp_mesh[0], tmp_mesh[1], tmp_mesh[2]));
01582       time_best = tmp_time;
01583       mesh[0]   = tmp_mesh[0];
01584       mesh[1]   = tmp_mesh[1];
01585       mesh[2]   = tmp_mesh[2];
01586       cao       = tmp_cao;
01587       r_cut_iL  = tmp_r_cut_iL;
01588       alpha_L   = tmp_alpha_L;
01589       accuracy  = tmp_accuracy;
01590     }
01591     /* no hope of further optimisation */
01592     else if (tmp_time > time_best + P3M_TIME_GRAN) {
01593       P3M_TRACE(fprintf(stderr, "%d: %lf is mush slower then best time, aborting.\n", this_node, tmp_time));
01594       break;
01595     }
01596   }
01597   
01598   P3M_TRACE(fprintf(stderr,"%d: finished tuning, best time: %lf\n", this_node,time_best));
01599   if(time_best == 1e20) {
01600     *log = strcat_alloc(*log, "failed to tune P3M parameters to required accuracy\n");
01601     return ES_ERROR;
01602   }
01603 
01604   /* set tuned p3m parameters */
01605   p3m.params.r_cut_iL = r_cut_iL;
01606   p3m.params.mesh[0]  = mesh[0];
01607   p3m.params.mesh[1]  = mesh[1];
01608   p3m.params.mesh[2]  = mesh[2];
01609   p3m.params.cao      = cao;
01610   p3m.params.alpha_L  = alpha_L;
01611   p3m.params.accuracy = accuracy;
01612   p3m_scaleby_box_l();
01613   /* broadcast tuned p3m parameters */
01614   P3M_TRACE(fprintf(stderr,"%d: Broadcasting P3M parameters: mesh: (%d %d %d), cao: %d, alpha_L: %lf, acccuracy: %lf\n", this_node, p3m.params.mesh[0], p3m.params.mesh[1],  p3m.params.mesh[2], p3m.params.cao, p3m.params.alpha_L, p3m.params.accuracy));
01615   mpi_bcast_coulomb_params();
01616 
01617   P3M_TRACE(p3m_print());
01618 
01619   /* Tell the user about the outcome */
01620   sprintf(b, "\nresulting parameters:\n%-4d %-3d %.5e %.5e %.5e %-8d\n",
01621           mesh[0], cao, r_cut_iL, alpha_L, accuracy, (int)time_best);
01622   *log = strcat_alloc(*log, b);
01623   return ES_OK;
01624 }
01625   
01626 void p3m_count_charged_particles()
01627 {  
01628   Cell *cell;
01629   Particle *part;
01630   int i,c,np;
01631   double node_sums[3], tot_sums[3];
01632 
01633   P3M_TRACE(fprintf(stderr,"%d: p3m_count_charged_particles\n",this_node));
01634 
01635   for(i=0;i<3;i++)
01636     { node_sums[i]=0.0; tot_sums[i]=0.0;}
01637 
01638   for (c = 0; c < local_cells.n; c++) {
01639     cell = local_cells.cell[c];
01640     part = cell->part;
01641     np   = cell->n;
01642     for(i=0;i<np;i++) {
01643       if( part[i].p.q != 0.0 ) {
01644         node_sums[0] += 1.0;
01645         node_sums[1] += SQR(part[i].p.q);
01646         node_sums[2] += part[i].p.q;
01647       }
01648     }
01649   }
01650   
01651   MPI_Allreduce(node_sums, tot_sums, 3, MPI_DOUBLE, MPI_SUM, comm_cart);
01652   p3m.sum_qpart    = (int)(tot_sums[0]+0.1);
01653   p3m.sum_q2       = tot_sums[1];
01654   p3m.square_sum_q = SQR(tot_sums[2]);
01655   
01656   P3M_TRACE(fprintf(stderr, "%d: p3m.sum_qpart: %d, p3m.sum_q2: %lf, total_charge %lf\n", this_node, p3m.sum_qpart, p3m.sum_q2, sqrt(p3m.square_sum_q)));
01657 }
01658 
01659 
01660 double p3m_real_space_error(double prefac, double r_cut_iL, 
01661                             int n_c_part, double sum_q2, double alpha_L)
01662 {
01663   return (2.0*prefac*sum_q2*exp(-SQR(r_cut_iL*alpha_L))) / (sqrt((double)n_c_part*r_cut_iL)*box_l[1]*box_l[2]);
01664 }
01665 
01666 double p3m_k_space_error(double prefac, int mesh[3], int cao, int n_c_part, double sum_q2, double alpha_L)
01667 {
01668   int  nx, ny, nz;
01669   double he_q = 0.0, mesh_i[3] = {1.0/mesh[0], 1.0/mesh[1], 1.0/mesh[2]}, alpha_L_i = 1./alpha_L;
01670   double alias1, alias2, n2, cs;
01671   double ctan_x, ctan_y;
01672 
01673   for (nx=-mesh[0]/2; nx<mesh[0]/2; nx++) {
01674     ctan_x = p3m_analytic_cotangent_sum(nx,mesh_i[0],cao);
01675     for (ny=-mesh[1]/2; ny<mesh[1]/2; ny++) {
01676       ctan_y = ctan_x * p3m_analytic_cotangent_sum(ny,mesh_i[1],cao);
01677       for (nz=-mesh[2]/2; nz<mesh[2]/2; nz++) {
01678         if((nx!=0) || (ny!=0) || (nz!=0)) {
01679           n2 = SQR(nx) + SQR(ny) + SQR(nz);
01680           cs = p3m_analytic_cotangent_sum(nz,mesh_i[2],cao)*ctan_y;
01681           p3m_tune_aliasing_sums(nx,ny,nz,mesh,mesh_i,cao,alpha_L_i,&alias1,&alias2);
01682 
01683           double d = alias1  -  SQR(alias2/cs) / n2;
01684           /* at high precisions, d can become negative due to extinction;
01685              also, don't take values that have no significant digits left*/
01686           if (d > 0 && (fabs(d/alias1) > ROUND_ERROR_PREC))
01687             he_q += d;
01688         }
01689       }
01690     }
01691   }
01692   return 2.0*prefac*sum_q2*sqrt(he_q/(double)n_c_part) / (box_l[1]*box_l[2]);
01693 }
01694 
01695 
01696 void p3m_tune_aliasing_sums(int nx, int ny, int nz, 
01697                             int mesh[3], double mesh_i[3], int cao, double alpha_L_i, 
01698                             double *alias1, double *alias2)
01699 {
01700 
01701   int    mx,my,mz;
01702   double nmx,nmy,nmz;
01703   double fnmx,fnmy,fnmz;
01704 
01705   double ex,ex2,nm2,U2,factor1;
01706 
01707   factor1 = SQR(PI*alpha_L_i);
01708 
01709   *alias1 = *alias2 = 0.0;
01710   for (mx=-P3M_BRILLOUIN; mx<=P3M_BRILLOUIN; mx++) {
01711     fnmx = mesh_i[0] * (nmx = nx + mx*mesh[0]);
01712     for (my=-P3M_BRILLOUIN; my<=P3M_BRILLOUIN; my++) {
01713       fnmy = mesh_i[1] * (nmy = ny + my*mesh[1]);
01714       for (mz=-P3M_BRILLOUIN; mz<=P3M_BRILLOUIN; mz++) {
01715         fnmz = mesh_i[2] * (nmz = nz + mz*mesh[2]);
01716 
01717         nm2 = SQR(nmx) + SQR(nmy) + SQR(nmz);
01718         ex2 = SQR( ex = exp(-factor1*nm2) );
01719         
01720         U2 = pow(sinc(fnmx)*sinc(fnmy)*sinc(fnmz), 2.0*cao);
01721         
01722         *alias1 += ex2 / nm2;
01723         *alias2 += U2 * ex * (nx*nmx + ny*nmy + nz*nmz) / nm2;
01724       }
01725     }
01726   }
01727 }
01728 
01729 
01730 
01731 
01732 /************************************************************/
01733 
01734 void p3m_calc_local_ca_mesh() {
01735   int i;
01736   int ind[3];
01737   /* total skin size */
01738   double full_skin[3];
01739 
01740   for(i=0;i<3;i++)
01741     full_skin[i]= p3m.params.cao_cut[i]+skin+p3m.params.additional_mesh[i];
01742 
01743   /* inner left down grid point (global index) */
01744   for(i=0;i<3;i++) p3m.local_mesh.in_ld[i] = (int)ceil(my_left[i]*p3m.params.ai[i]-p3m.params.mesh_off[i]);
01745   /* inner up right grid point (global index) */
01746   for(i=0;i<3;i++) p3m.local_mesh.in_ur[i] = (int)floor(my_right[i]*p3m.params.ai[i]-p3m.params.mesh_off[i]);
01747   
01748   /* correct roundof errors at boundary */
01749   for(i=0;i<3;i++) {
01750     if((my_right[i]*p3m.params.ai[i]-p3m.params.mesh_off[i])-p3m.local_mesh.in_ur[i]<ROUND_ERROR_PREC) p3m.local_mesh.in_ur[i]--;
01751     if(1.0+(my_left[i]*p3m.params.ai[i]-p3m.params.mesh_off[i])-p3m.local_mesh.in_ld[i]<ROUND_ERROR_PREC) p3m.local_mesh.in_ld[i]--;
01752   }
01753   /* inner grid dimensions */
01754   for(i=0;i<3;i++) p3m.local_mesh.inner[i] = p3m.local_mesh.in_ur[i] - p3m.local_mesh.in_ld[i] + 1;
01755   /* index of left down grid point in global mesh */
01756   for(i=0;i<3;i++) 
01757     p3m.local_mesh.ld_ind[i]=(int)ceil((my_left[i]-full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i]);
01758   /* left down margin */
01759   for(i=0;i<3;i++) p3m.local_mesh.margin[i*2] = p3m.local_mesh.in_ld[i]-p3m.local_mesh.ld_ind[i];
01760   /* up right grid point */
01761   for(i=0;i<3;i++) ind[i]=(int)floor((my_right[i]+full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i]);
01762   /* correct roundof errors at up right boundary */
01763   for(i=0;i<3;i++)
01764     if(((my_right[i]+full_skin[i])*p3m.params.ai[i]-p3m.params.mesh_off[i])-ind[i]==0) ind[i]--;
01765   /* up right margin */
01766   for(i=0;i<3;i++) p3m.local_mesh.margin[(i*2)+1] = ind[i] - p3m.local_mesh.in_ur[i];
01767 
01768   /* grid dimension */
01769   p3m.local_mesh.size=1; 
01770   for(i=0;i<3;i++) {p3m.local_mesh.dim[i] = ind[i] - p3m.local_mesh.ld_ind[i] + 1; p3m.local_mesh.size*=p3m.local_mesh.dim[i];}
01771   /* reduce inner grid indices from global to local */
01772   for(i=0;i<3;i++) p3m.local_mesh.in_ld[i] = p3m.local_mesh.margin[i*2];
01773   for(i=0;i<3;i++) p3m.local_mesh.in_ur[i] = p3m.local_mesh.margin[i*2]+p3m.local_mesh.inner[i];
01774 
01775   p3m.local_mesh.q_2_off  = p3m.local_mesh.dim[2] - p3m.params.cao;
01776   p3m.local_mesh.q_21_off = p3m.local_mesh.dim[2] * (p3m.local_mesh.dim[1] - p3m.params.cao);
01777  
01778 }
01779 
01780 void p3m_calc_lm_ld_pos() {
01781   int i; 
01782   /* spacial position of left down mesh point */
01783   for(i=0;i<3;i++) {
01784     p3m.local_mesh.ld_pos[i] = (p3m.local_mesh.ld_ind[i]+ p3m.params.mesh_off[i])*p3m.params.a[i];
01785   }
01786 }
01787 
01788 
01789 void p3m_init_a_ai_cao_cut() {
01790   int i;
01791   for(i=0;i<3;i++) {
01792     p3m.params.ai[i]      = (double)p3m.params.mesh[i]/box_l[i]; 
01793     p3m.params.a[i]       = 1.0/p3m.params.ai[i];
01794     p3m.params.cao_cut[i] = 0.5*p3m.params.a[i]*p3m.params.cao;
01795   }
01796 }
01797 
01798 
01799 
01800 
01801 int p3m_sanity_checks_boxl() {
01802   char *errtxt;
01803   int i, ret = 0;
01804   for(i=0;i<3;i++) {
01805     /* check k-space cutoff */
01806     if(p3m.params.cao_cut[i] >= 0.5*box_l[i]) {
01807       errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE);
01808       ERROR_SPRINTF(errtxt,"{039 P3M_init: k-space cutoff %g is larger than half of box dimension %g} ",p3m.params.cao_cut[i],box_l[i]);
01809       ret = 1;
01810     }
01811     if(p3m.params.cao_cut[i] >= local_box_l[i]) {
01812       errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE);
01813       ERROR_SPRINTF(errtxt,"{040 P3M_init: k-space cutoff %g is larger than local box dimension %g} ",p3m.params.cao_cut[i],local_box_l[i]);
01814       ret = 1;
01815     }
01816   }
01817   
01818   
01819   return ret;
01820 }
01821 
01822 
01823 
01824 
01825 int p3m_sanity_checks()
01826 {
01827   char *errtxt;
01828   int ret = 0;
01829 
01830   if (!PERIODIC(0) || !PERIODIC(1) || !PERIODIC(2)) {
01831     errtxt = runtime_error(128);
01832     ERROR_SPRINTF(errtxt, "{041 P3M requires periodicity 1 1 1} ");
01833     ret = 1;
01834   }
01835   
01836   if (cell_structure.type != CELL_STRUCTURE_DOMDEC) {
01837     errtxt = runtime_error(128);
01838     ERROR_SPRINTF(errtxt, "{042 P3M at present requires the domain decomposition cell system} ");
01839     ret = 1;
01840   }
01841 
01842   if (p3m_sanity_checks_boxl()) ret = 1;
01843 
01844   if( p3m.params.mesh[0] == 0) {
01845     errtxt = runtime_error(128);
01846     ERROR_SPRINTF(errtxt,"{045 P3M_init: mesh size is not yet set} ");
01847     ret = 1;
01848   }
01849   if( p3m.params.cao == 0) {
01850     errtxt = runtime_error(128);
01851     ERROR_SPRINTF(errtxt,"{046 P3M_init: cao is not yet set} ");
01852     ret = 1;
01853   }
01854   if (skin == -1) {
01855     errtxt = runtime_error(128);
01856     ERROR_SPRINTF(errtxt,"{047 P3M_init: skin is not yet set} ");
01857     ret = 1;
01858   }
01859   if (p3m.params.alpha < 0.0 ) {
01860     errtxt = runtime_error(128);
01861     ERROR_SPRINTF(errtxt,"{048 P3M_init: alpha must be >0} ");
01862     ret = 1;
01863   }
01864   if(node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) {
01865     errtxt = runtime_error(128);
01866     ERROR_SPRINTF(errtxt,"{048a P3M_init: node grid must be sorted, largest first} ");
01867     ret = 1;
01868   }
01869   
01870   if (p3m.params.epsilon != P3M_EPSILON_METALLIC) {
01871     if( !((p3m.params.mesh[0] == p3m.params.mesh[1]) &&
01872           (p3m.params.mesh[1] == p3m.params.mesh[2]))) {
01873           errtxt = runtime_error(128);
01874           ERROR_SPRINTF(errtxt,"{049 P3M_init: Nonmetallic epsilon requires cubic box} ");
01875           ret = 1;
01876         }
01877   }
01878   
01879 
01880   return ret;
01881 }
01882 
01883 
01884 
01885 
01886 void p3m_calc_send_mesh()
01887 {
01888   int i,j,evenodd;
01889   int done[3]={0,0,0};
01890   MPI_Status status;
01891   /* send grids */
01892   for(i=0;i<3;i++) {
01893     for(j=0;j<3;j++) {
01894       /* left */
01895       p3m.sm.s_ld[i*2][j] = 0 + done[j]*p3m.local_mesh.margin[j*2];
01896       if(j==i) p3m.sm.s_ur[i*2][j] = p3m.local_mesh.margin[j*2]; 
01897       else     p3m.sm.s_ur[i*2][j] = p3m.local_mesh.dim[j]-done[j]*p3m.local_mesh.margin[(j*2)+1];
01898       /* right */
01899       if(j==i) p3m.sm.s_ld[(i*2)+1][j] = p3m.local_mesh.in_ur[j];
01900       else     p3m.sm.s_ld[(i*2)+1][j] = 0 + done[j]*p3m.local_mesh.margin[j*2];
01901       p3m.sm.s_ur[(i*2)+1][j] = p3m.local_mesh.dim[j] - done[j]*p3m.local_mesh.margin[(j*2)+1];
01902     }   
01903     done[i]=1;
01904   }
01905   p3m.sm.max=0;
01906   for(i=0;i<6;i++) {
01907     p3m.sm.s_size[i] = 1;
01908     for(j=0;j<3;j++) {
01909       p3m.sm.s_dim[i][j] = p3m.sm.s_ur[i][j]-p3m.sm.s_ld[i][j];
01910       p3m.sm.s_size[i] *= p3m.sm.s_dim[i][j];
01911     }
01912     if(p3m.sm.s_size[i]>p3m.sm.max) p3m.sm.max=p3m.sm.s_size[i];
01913   }
01914   /* communication */
01915   for(i=0;i<6;i++) {
01916     if(i%2==0) j = i+1;
01917     else       j = i-1;
01918     if(node_neighbors[i] != this_node) {
01919       /* two step communication: first all even positions than all odd */
01920       for(evenodd=0; evenodd<2;evenodd++) {
01921         if((node_pos[i/2]+evenodd)%2==0)
01922           MPI_Send(&(p3m.local_mesh.margin[i]), 1, MPI_INT, 
01923                    node_neighbors[i],REQ_P3M_INIT,comm_cart);
01924         else
01925           MPI_Recv(&(p3m.local_mesh.r_margin[j]), 1, MPI_INT,
01926                    node_neighbors[j],REQ_P3M_INIT,comm_cart,&status);    
01927       }
01928     }
01929     else {
01930       p3m.local_mesh.r_margin[j] = p3m.local_mesh.margin[i];
01931     }
01932   }
01933   /* recv grids */
01934   for(i=0;i<3;i++) 
01935     for(j=0;j<3;j++) {
01936       if(j==i) {
01937         p3m.sm.r_ld[ i*2   ][j] = p3m.sm.s_ld[ i*2   ][j] + p3m.local_mesh.margin[2*j];
01938         p3m.sm.r_ur[ i*2   ][j] = p3m.sm.s_ur[ i*2   ][j] + p3m.local_mesh.r_margin[2*j];
01939         p3m.sm.r_ld[(i*2)+1][j] = p3m.sm.s_ld[(i*2)+1][j] - p3m.local_mesh.r_margin[(2*j)+1];
01940         p3m.sm.r_ur[(i*2)+1][j] = p3m.sm.s_ur[(i*2)+1][j] - p3m.local_mesh.margin[(2*j)+1];
01941       }
01942       else {
01943         p3m.sm.r_ld[ i*2   ][j] = p3m.sm.s_ld[ i*2   ][j];
01944         p3m.sm.r_ur[ i*2   ][j] = p3m.sm.s_ur[ i*2   ][j];
01945         p3m.sm.r_ld[(i*2)+1][j] = p3m.sm.s_ld[(i*2)+1][j];
01946         p3m.sm.r_ur[(i*2)+1][j] = p3m.sm.s_ur[(i*2)+1][j];
01947       }
01948     }
01949   for(i=0;i<6;i++) {
01950     p3m.sm.r_size[i] = 1;
01951     for(j=0;j<3;j++) {
01952       p3m.sm.r_dim[i][j] = p3m.sm.r_ur[i][j]-p3m.sm.r_ld[i][j];
01953       p3m.sm.r_size[i] *= p3m.sm.r_dim[i][j];
01954     }
01955     if(p3m.sm.r_size[i]>p3m.sm.max) p3m.sm.max=p3m.sm.r_size[i];
01956   }
01957   
01958   
01959 }
01960 
01961 
01962 
01963 /************************************************/
01964 
01965 void p3m_scaleby_box_l() {
01966   if (coulomb.bjerrum == 0.0) {
01967     return;
01968   }
01969 
01970   p3m.params.r_cut = p3m.params.r_cut_iL* box_l[0];
01971   p3m.params.alpha = p3m.params.alpha_L * box_l_i[0];
01972   p3m_init_a_ai_cao_cut();
01973   p3m_calc_lm_ld_pos();
01974   p3m_sanity_checks_boxl(); 
01975   p3m_calc_influence_function_force();
01976   p3m_calc_influence_function_energy();  
01977 }
01978 
01979 /************************************************/
01980 
01981 void p3m_calc_kspace_stress (double* stress) {
01982     if (p3m.sum_q2 > 0) {
01983         double* node_k_space_stress;
01984         double* k_space_stress;
01985         double force_prefac, node_k_space_energy, sqk, vterm, kx, ky, kz;
01986         int jx, jy, jz, i, ind = 0;
01987         // ordering after fourier transform
01988         const int x = 2, y = 0, z = 1;
01989         node_k_space_stress = malloc(9*sizeof(double));
01990         k_space_stress = malloc(9*sizeof(double));
01991 
01992         for (i = 0; i < 9; i++) {
01993             node_k_space_stress[i] = 0.0;
01994             k_space_stress[i] = 0.0;
01995         }
01996 
01997         p3m_gather_fft_grid(p3m.rs_mesh);
01998         fft_perform_forw(p3m.rs_mesh);
01999         force_prefac = coulomb.prefactor / (2.0 * box_l[0] * box_l[1] * box_l[2]);
02000 
02001         for(jx=0; jx < fft.plan[3].new_mesh[0]; jx++) {
02002             for(jy=0; jy < fft.plan[3].new_mesh[1]; jy++) {
02003                 for(jz=0; jz < fft.plan[3].new_mesh[2]; jz++) {
02004                        kx = p3m.d_op[2][ jx + fft.plan[3].start[0] ];
02005                        ky = p3m.d_op[0][ jy + fft.plan[3].start[1] ];
02006                        kz = p3m.d_op[1][ jz + fft.plan[3].start[2] ];
02007                     sqk = SQR(kx/box_l[x]) + SQR(ky/box_l[y]) + SQR(kz/box_l[z]);
02008                     if (sqk == 0) {
02009                         node_k_space_energy = 0.0;
02010                         vterm = 0.0;
02011                     }
02012                     else {
02013                         vterm = -2.0 * (1/sqk + SQR(PI/p3m.params.alpha));
02014                         node_k_space_energy = p3m.g_energy[ind] * ( SQR(p3m.rs_mesh[2*ind]) + SQR(p3m.rs_mesh[2*ind + 1]) );
02015                     }
02016                     ind++;
02017 
02018                     node_k_space_stress[0] += node_k_space_energy * (1.0 + vterm*SQR(kx/box_l[x]));     /* sigma_xx */
02019                     node_k_space_stress[1] += node_k_space_energy * (vterm*kx*ky/(box_l[x]*box_l[y]));  /* sigma_xy */
02020                     node_k_space_stress[2] += node_k_space_energy * (vterm*kx*kz/(box_l[x]*box_l[z]));  /* sigma_xz */
02021 
02022                     node_k_space_stress[3] += node_k_space_energy * (vterm*kx*ky/(box_l[x]*box_l[y]));  /* sigma_yx */
02023                     node_k_space_stress[4] += node_k_space_energy * (1.0 + vterm*SQR(ky/box_l[y]));     /* sigma_yy */
02024                     node_k_space_stress[5] += node_k_space_energy * (vterm*ky*kz/(box_l[y]*box_l[z]));  /* sigma_yz */
02025 
02026                     node_k_space_stress[6] += node_k_space_energy * (vterm*kx*kz/(box_l[x]*box_l[z]));  /* sigma_zx */
02027                     node_k_space_stress[7] += node_k_space_energy * (vterm*ky*kz/(box_l[y]*box_l[z]));  /* sigma_zy */
02028                     node_k_space_stress[8] += node_k_space_energy * (1.0 + vterm*SQR(kz/box_l[z]));     /* sigma_zz */
02029                 }
02030             }
02031         }
02032         MPI_Reduce(node_k_space_stress, k_space_stress, 9, MPI_DOUBLE, MPI_SUM, 0, comm_cart);
02033         for (i = 0; i < 9; i++) {
02034             stress[i] += k_space_stress[i] * force_prefac;
02035         }
02036 //         fprintf(stderr, "sxx = %.5e, syy = %.5e, szz = %.5e\n", stress[0], stress[4], stress[8]);
02037 //         fprintf(stderr, "sxy = %.5e, sxz = %.5e, syz = %.5e\n", stress[1], stress[2], stress[5]);
02038         free (node_k_space_stress);
02039         free (k_space_stress);
02040     }
02041 }
02042 
02043 
02044 /************************************************/
02045 
02046 
02047 
02048 
02049 /*********************** miscelanea of functions *************************************/
02050 
02051 /************************************************
02052  * Debug functions printing p3m structures 
02053  ************************************************/
02054 
02055 void p3m_p3m_print_struct(p3m_parameter_struct ps) {
02056   fprintf(stderr,"%d: p3m_parameter_struct: \n",this_node);
02057   fprintf(stderr,"   alpha_L=%f, r_cut_iL=%f \n",
02058           ps.alpha_L,ps.r_cut_iL);
02059   fprintf(stderr,"   mesh=(%d,%d,%d), mesh_off=(%.4f,%.4f,%.4f)\n",
02060           ps.mesh[0],ps.mesh[1],ps.mesh[2],
02061           ps.mesh_off[0],ps.mesh_off[1],ps.mesh_off[2]);
02062   fprintf(stderr,"   cao=%d, inter=%d, epsilon=%f\n",
02063           ps.cao,ps.inter,ps.epsilon);
02064   fprintf(stderr,"   cao_cut=(%f,%f,%f)\n",
02065           ps.cao_cut[0],ps.cao_cut[1],ps.cao_cut[2]);
02066   fprintf(stderr,"   a=(%f,%f,%f), ai=(%f,%f,%f)\n",
02067           ps.a[0],ps.a[1],ps.a[2],ps.ai[0],ps.ai[1],ps.ai[2]);
02068 }
02069 
02070 #endif /* of P3M */
02071