ESPResSo 3.2.0-167-g2c9ead1-git
Extensible Simulation Package for Soft Matter Research
p3m-dipolar.c
Go to the documentation of this file.
00001 /*
00002   Copyright (C) 2010,2011,2012,2013 The ESPResSo project
00003   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
00004     Max-Planck-Institute for Polymer Research, Theory Group
00005   
00006   This file is part of ESPResSo.
00007   
00008   ESPResSo is free software: you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation, either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   ESPResSo is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
00020 */
00021 /** \file p3m-dipolar.c  P3M algorithm for long range magnetic dipole-dipole interaction.
00022  *
00023  NB: In general the magnetic dipole-dipole functions bear the same
00024      name than the charge-charge but, adding in front of the name a D
00025      and replacing where "charge" appears by "dipole". In this way one
00026      can recognize the similarity of the functions but avoiding nasty
00027      confusions in their use.
00028 
00029  PS: By default the magnetic epsilon is metallic = 0.  
00030 */
00031 
00032 #include "p3m-dipolar.h"
00033 
00034 #include <mpi.h>
00035 #include <stdio.h>
00036 #include <stdlib.h>
00037 #include <string.h>
00038 
00039 #include "utils.h"
00040 #include "integrate.h"
00041 #include "global.h"
00042 #include "grid.h"
00043 #include "domain_decomposition.h"
00044 #include "particle_data.h"
00045 #include "communication.h"
00046 #include "fft-dipolar.h"
00047 #include "thermostat.h"
00048 #include "cells.h"
00049 #include "tuning.h"
00050 
00051 #ifdef DP3M
00052 
00053 /************************************************
00054  * DEFINES
00055  ************************************************/
00056 
00057 /* MPI tags for the charge-charge p3m communications: */
00058 /** Tag for communication in P3M_init() -> send_calc_mesh(). */
00059 #define REQ_P3M_INIT_D   2001
00060 /** Tag for communication in p3m_gather_fft_grid(). */
00061 #define REQ_P3M_GATHER_D 2011
00062 /** Tag for communication in p3m_spread_force_grid(). */
00063 #define REQ_P3M_SPREAD_D 2021
00064 
00065 /************************************************
00066  * variables
00067  ************************************************/
00068 
00069 dp3m_data_struct dp3m;
00070 
00071 /** \name Private Functions */
00072 /************************************************************/
00073 /*@{*/
00074 
00075 
00076 /** Calculates for magnetic dipoles the properties of the send/recv sub-meshes of the local FFT mesh. 
00077  *  In order to calculate the recv sub-meshes there is a communication of 
00078  *  the margins between neighbouring nodes. */ 
00079 static void dp3m_calc_send_mesh();
00080 
00081 /** Initializes for magnetic dipoles the (inverse) mesh constant \ref
00082     p3m_parameter_struct::a (\ref p3m_parameter_struct::ai) and the cutoff for charge
00083     assignment \ref p3m_parameter_struct::cao_cut, which has to be done by \ref
00084     dp3m_init once and by \ref dp3m_scaleby_box_l
00085     whenever the \ref box_l changed.  */
00086 static void dp3m_init_a_ai_cao_cut();
00087 
00088 
00089 /** Calculate for magnetic dipoles the spacial position of the left down mesh point of the local mesh, to be
00090     stored in \ref p3m_local_mesh::ld_pos; function called by \ref dp3m_calc_local_ca_mesh once
00091     and by \ref dp3m_scaleby_box_l whenever the \ref box_l changed. */
00092 static void dp3m_calc_lm_ld_pos();
00093 
00094 
00095 /** Gather FFT grid.
00096  *  After the charge assignment Each node needs to gather the
00097  *  information for the FFT grid in his spatial domain.
00098  */
00099 static void dp3m_gather_fft_grid(double* mesh);
00100 
00101 /** Spread force grid.
00102  *  After the k-space calculations each node needs to get all force
00103  *  information to reassigne the forces from the grid to the
00104  *  particles.
00105  */
00106 static void dp3m_spread_force_grid(double* mesh);
00107 
00108 /** realloc charge assignment fields. */
00109 static void dp3m_realloc_ca_fields(int newsize);
00110 
00111 
00112 /** Initializes the (inverse) mesh constant \ref p3m_parameter_struct::a (\ref
00113     p3m_parameter_struct::ai) and the cutoff for charge assignment \ref
00114     p3m_parameter_struct::cao_cut, which has to be done by \ref dp3m_init
00115     once and by \ref dp3m_scaleby_box_l whenever the \ref box_l
00116     changed.  */
00117 static void dp3m_init_a_ai_cao_cut();
00118 
00119 
00120 /** checks for correctness for magnetic dipoles in P3M of the cao_cut, necessary when the box length changes */
00121 static int dp3m_sanity_checks_boxl(void);
00122 
00123 
00124 /** Calculate the spacial position of the left down mesh point of the local mesh, to be
00125     stored in \ref p3m_local_mesh::ld_pos; function called by \ref dp3m_calc_local_ca_mesh once
00126     and by \ref dp3m_scaleby_box_l whenever the \ref box_l changed. */
00127 static void dp3m_calc_lm_ld_pos();
00128 
00129 
00130 /** Calculates properties of the local FFT mesh for the 
00131     charge assignment process. */
00132 static void dp3m_calc_local_ca_mesh();
00133 
00134 /** Interpolates the P-th order charge assignment function from
00135  * Hockney/Eastwood 5-189 (or 8-61). The following charge fractions
00136  * are also tabulated in Deserno/Holm. */
00137 static void dp3m_interpolate_dipole_assignment_function();
00138 
00139 /** shifts the mesh points by mesh/2 */
00140 static void dp3m_calc_meshift();
00141 
00142 /** Calculates the Fourier transformed differential operator.  
00143  *  Remark: This is done on the level of n-vectors and not k-vectors,
00144  *           i.e. the prefactor i*2*PI/L is missing! */
00145 static void dp3m_calc_differential_operator();
00146 
00147 /** Calculates the influence function optimized for the dipolar forces. */
00148 static void dp3m_calc_influence_function_force();
00149 
00150 /** Calculates the influence function optimized for the dipolar energy and torques. */
00151 static void dp3m_calc_influence_function_energy();
00152 
00153 /** Calculates the constants necessary to correct the dipolar energy to minimize the error. */
00154 static void dp3m_compute_constants_energy_dipolar();
00155  
00156 /** Calculates the aliasing sums for the optimal influence function.
00157  *
00158  * Calculates the aliasing sums in the nominator and denominator of
00159  * the expression for the optimal influence function (see
00160  * Hockney/Eastwood: 8-22, p. 275).  
00161  *
00162  * \param  n           n-vector for which the aliasing sum is to be performed.
00163  * \param  nominator   aliasing sums in the nominator.
00164  * \return denominator aliasing sum in the denominator
00165  */
00166 static double dp3m_perform_aliasing_sums_force(int n[3], double nominator[1]);
00167 static double dp3m_perform_aliasing_sums_energy(int n[3], double nominator[1]);
00168 
00169 static double dp3m_k_space_error(double box_size, double prefac, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L);
00170 /*@}*/
00171 
00172 
00173 /* Compute the dipolar surface terms */
00174 static double calc_surface_term(int force_flag, int energy_flag);
00175 
00176 
00177 /** \name P3M Tuning Functions (private)*/
00178 /************************************************************/
00179 /*@{*/
00180 
00181 
00182 // These 3 functions are to tune the P3M code in the case of dipolar interactions
00183 
00184 double P3M_DIPOLAR_real_space_error(double box_size, double prefac, double r_cut_iL,
00185                             int n_c_part, double sum_q2, double alpha_L);
00186 static void dp3m_tune_aliasing_sums(int nx, int ny, int nz, 
00187                             int mesh, double mesh_i, int cao, double alpha_L_i, 
00188                             double *alias1, double *alias2)     ;                
00189 
00190 // To compute the value of alpha  through a bibisection method from the formula 33 of JCP115,6351,(2001).
00191 double dp3m_rtbisection(double box_size, double prefac, double r_cut_iL, int n_c_part, double sum_q2,  double x1, double x2, double xacc, double tuned_accuracy);
00192 
00193 /*@}*/
00194 
00195 /************************************************************/
00196 /* functions related to the correction of the dipolar p3m-energy */
00197 
00198 static double dp3m_average_dipolar_self_energy(double box_l, int mesh);
00199 static double dp3m_perform_aliasing_sums_dipolar_self_energy(int n[3]);
00200 
00201 /************************************************************/
00202 /* functions related to the correction of the dipolar p3m-energy */
00203 
00204 /*
00205 
00206 // Do the sum over k<>0 where k={kx,ky,kz} with kx,ky,kz INTEGERS, of
00207 // exp(-PI**2*k**2/alpha**2/L**2)
00208 static double dp3m_sumi1(double alpha_L){
00209        int k2,kx,ky,kz,kx2,ky2,limit=60;
00210        double suma,alpha_L2;
00211        
00212        alpha_L2= alpha_L* alpha_L;
00213        
00214        //fprintf(stderr,"alpha_L=%le\n",alpha_L); 
00215        //fprintf(stderr,"PI=%le\n",PI); 
00216        
00217        
00218        suma=0.0;
00219        for(kx=-limit;kx<=limit;kx++){
00220          kx2=kx*kx;
00221        for(ky=-limit;ky<=limit;ky++){
00222          ky2=ky*ky;
00223        for(kz=-limit;kz<=limit;kz++){
00224            k2=kx2+ky2+kz*kz;
00225            suma+=exp(-PI*PI*k2/(alpha_L*alpha_L));
00226        }}} 
00227        suma-=1; //It's easier to substract the term k=0 later than put an if inside the loops
00228        
00229        
00230          //fprintf(stderr,"suma=%le\n",suma); 
00231      
00232        
00233    return suma;
00234 }
00235 
00236 */
00237 
00238 /************************************************************/
00239 
00240 /* 
00241 // Do the sum over n<>0 where n={nx*L,ny*L,nz*L} with nx,ny,nz INTEGERS, of
00242 // exp(-alpha_iL**2*n**2)
00243 static double dp3m_sumi2(double alpha_L){
00244        int n2,nx,ny,nz,nx2,ny2,limit=60;
00245        double suma;
00246        
00247  
00248        
00249        suma=0.0;
00250        for(nx=-limit;nx<=limit;nx++){
00251          nx2=nx*nx;
00252        for(ny=-limit;ny<=limit;ny++){
00253          ny2=ny*ny;
00254        for(nz=-limit;nz<=limit;nz++){
00255            n2=nx2+ny2+nz*nz;
00256            suma+=exp(-alpha_L*alpha_L*n2);
00257        }}} 
00258        suma-=1; //It's easier to substract the term n=0 later than put an if inside the loops
00259        
00260        
00261        
00262    return suma;
00263 }
00264 
00265 */
00266 
00267 void dp3m_pre_init(void) {
00268   p3m_common_parameter_pre_init(&dp3m.params);
00269   dp3m.params.epsilon = P3M_EPSILON_MAGNETIC;
00270 
00271   /* dp3m.local_mesh is uninitialized */
00272   /* dp3m.sm is uninitialized */
00273   dp3m.rs_mesh = NULL;
00274   dp3m.rs_mesh_dip[0] = NULL;
00275   dp3m.rs_mesh_dip[1] = NULL;
00276   dp3m.rs_mesh_dip[2] = NULL;
00277   dp3m.ks_mesh = NULL;
00278 
00279   dp3m.sum_dip_part = 0;
00280   dp3m.sum_mu2 = 0.0;
00281 
00282   for (int i = 0; i < 7; i++)
00283     dp3m.int_caf[i] = NULL;
00284   dp3m.pos_shift = 0.0;
00285   dp3m.meshift = NULL;
00286 
00287   dp3m.d_op = NULL;
00288   dp3m.g_force = NULL;
00289   dp3m.g_energy = NULL;
00290 
00291   dp3m.ca_num = 0;
00292   dp3m.ca_frac = NULL;
00293   dp3m.ca_fmp = NULL;
00294   dp3m.ks_pnum = 0;
00295 
00296   dp3m.send_grid = NULL;
00297   dp3m.recv_grid = NULL;
00298   
00299   dp3m.energy_correction = 0.0;
00300   
00301   dfft_pre_init();
00302 }
00303 
00304 void dp3m_set_bjerrum() {
00305   dp3m.params.alpha    = 0.0;
00306   dp3m.params.alpha_L  = 0.0;
00307   dp3m.params.r_cut    = 0.0;
00308   dp3m.params.r_cut_iL = 0.0;
00309   dp3m.params.mesh[0]  = 0;
00310   dp3m.params.mesh[1]  = 0;
00311   dp3m.params.mesh[2]  = 0;
00312   dp3m.params.cao      = 0;
00313 }
00314 
00315 
00316 void dp3m_init() {
00317   int n;
00318 
00319   if (coulomb.Dbjerrum == 0.0) {       
00320        if(coulomb.Dbjerrum == 0.0) {
00321            dp3m.params.r_cut    = 0.0;
00322            dp3m.params.r_cut_iL = 0.0;
00323           if(this_node==0) 
00324              P3M_TRACE(fprintf(stderr,"0: dp3m_init: dipolar Bjerrum length is zero.\n");
00325            fprintf(stderr,"   Magnetostatics of dipoles switched off!\n"));
00326       }
00327   } else {  
00328     P3M_TRACE(fprintf(stderr,"%d: dp3m_init: \n",this_node));
00329 
00330     if (dp3m_sanity_checks()) return;
00331 
00332     P3M_TRACE(fprintf(stderr,"%d: dp3m_init: starting\n",this_node));
00333 
00334         P3M_TRACE(fprintf(stderr,"%d: mesh=%d, cao=%d, mesh_off=(%f,%f,%f)\n",this_node,dp3m.params.mesh[0],dp3m.params.cao,dp3m.params.mesh_off[0],dp3m.params.mesh_off[1],dp3m.params.mesh_off[2]));
00335         dp3m.params.cao3 = dp3m.params.cao*dp3m.params.cao*dp3m.params.cao;
00336 
00337 
00338     /* initializes the (inverse) mesh constant dp3m.params.a (dp3m.params.ai) and the cutoff for charge assignment dp3m.params.cao_cut */
00339     dp3m_init_a_ai_cao_cut();
00340 
00341     /* initialize ca fields to size CA_INCREMENT: dp3m.ca_frac and dp3m.ca_fmp */
00342     dp3m.ca_num = 0;
00343     if(dp3m.ca_num < CA_INCREMENT) {
00344       dp3m.ca_num = 0;
00345       dp3m_realloc_ca_fields(CA_INCREMENT);
00346     }
00347  
00348     dp3m_calc_local_ca_mesh();
00349 
00350     dp3m_calc_send_mesh();
00351     P3M_TRACE(p3m_p3m_print_local_mesh(dp3m.local_mesh));
00352     
00353     /* DEBUG */
00354     for(n=0;n<n_nodes;n++) {
00355       /* MPI_Barrier(comm_cart); */
00356          if(n==this_node) P3M_TRACE(p3m_p3m_print_send_mesh(dp3m.sm));
00357     }
00358     
00359     dp3m.send_grid = (double *) realloc(dp3m.send_grid, sizeof(double)*dp3m.sm.max);
00360     dp3m.recv_grid = (double *) realloc(dp3m.recv_grid, sizeof(double)*dp3m.sm.max);
00361 
00362     /* fix box length dependent constants */
00363     dp3m_scaleby_box_l();
00364     
00365     if (dp3m.params.inter > 0) dp3m_interpolate_dipole_assignment_function();
00366 
00367     dp3m.pos_shift = (double)((dp3m.params.cao-1)/2) - (dp3m.params.cao%2)/2.0;
00368     P3M_TRACE(fprintf(stderr,"%d: dipolar pos_shift = %f\n",this_node,dp3m.pos_shift)); 
00369  
00370     /* FFT */
00371     P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh ADR=%p\n",this_node,dp3m.rs_mesh));
00372  
00373     int ca_mesh_size = dfft_init(&dp3m.rs_mesh,
00374                                  dp3m.local_mesh.dim,dp3m.local_mesh.margin,
00375                                  dp3m.params.mesh, dp3m.params.mesh_off,
00376                                  &dp3m.ks_pnum);
00377     dp3m.ks_mesh = (double *) realloc(dp3m.ks_mesh, ca_mesh_size*sizeof(double));
00378     
00379     for (n=0;n<3;n++)   
00380        dp3m.rs_mesh_dip[n] = (double *) realloc(dp3m.rs_mesh_dip[n], ca_mesh_size*sizeof(double));
00381 
00382      P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[0] ADR=%p\n",this_node,dp3m.rs_mesh_dip[0]));
00383      P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[1] ADR=%p\n",this_node,dp3m.rs_mesh_dip[1]));
00384      P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[2] ADR=%p\n",this_node,dp3m.rs_mesh_dip[2]));
00385  
00386  
00387     /* k-space part: */
00388     
00389     dp3m_calc_differential_operator();
00390 
00391     dp3m_calc_influence_function_force();
00392     dp3m_calc_influence_function_energy();
00393 
00394     dp3m_count_magnetic_particles();
00395 
00396     P3M_TRACE(fprintf(stderr,"%d: p3m initialized\n",this_node));
00397   }
00398 }
00399 
00400 void dp3m_free_dipoles() {
00401   for (int i=0;i<3;i++) free(dp3m.rs_mesh_dip[i]);
00402   free(dp3m.ca_frac);
00403   free(dp3m.ca_fmp);
00404   free(dp3m.send_grid);
00405   free(dp3m.recv_grid);
00406   free(dp3m.rs_mesh);
00407   free(dp3m.ks_mesh); 
00408 }
00409 
00410 double dp3m_average_dipolar_self_energy(double box_l, int mesh) {
00411   int   i,ind,n[3];
00412   double node_phi = 0.0, phi = 0.0;
00413   double U2;
00414         
00415   int end[3];
00416   int size=1;
00417         
00418   for(i=0;i<3;i++) {
00419     size *= dfft.plan[3].new_mesh[i];
00420     end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i];
00421   }
00422   
00423   for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++){
00424     for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++){
00425       for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) {
00426         ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] *
00427         ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0])));
00428 
00429         if( (n[0]==0) && (n[1]==0) && (n[2]==0) )
00430          node_phi += 0.0;
00431         else if( (n[0]%(dp3m.params.mesh[0]/2)==0) &&
00432                  (n[1]%(dp3m.params.mesh[0]/2)==0) &&
00433                  (n[2]%(dp3m.params.mesh[0]/2)==0) )
00434           node_phi += 0.0;
00435         else {
00436                   U2 = dp3m_perform_aliasing_sums_dipolar_self_energy(n);
00437                   node_phi += dp3m.g_energy[ind] * U2*(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]]));
00438         }
00439       }}}
00440   
00441       
00442   MPI_Reduce(&node_phi, &phi, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart);   
00443      
00444   phi*=PI/3./box_l/pow(mesh,3);
00445      
00446 
00447   return phi ;
00448 }
00449 
00450 
00451 
00452 double dp3m_perform_aliasing_sums_dipolar_self_energy(int n[3])
00453 {
00454   double u_sum = 0.0;
00455   /* lots of temporary variables... */
00456   double f1,sx,sy,sz,mx,my,mz,nmx,nmy,nmz;
00457   int    limit=P3M_BRILLOUIN+5;
00458 
00459   f1 = 1.0/(double)dp3m.params.mesh[0];
00460 
00461   for(mx = -limit; mx <=limit; mx++) {
00462     nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx;
00463     sx  = pow(sinc(f1*nmx),2.0*dp3m.params.cao);
00464     for(my = -limit; my <= limit; my++) {
00465       nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my;
00466       sy  = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao);
00467       for(mz = -limit; mz <=limit; mz++) {
00468         nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz;
00469         sz  = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao);
00470         u_sum += sz;
00471       }
00472     }
00473   }
00474   return u_sum;
00475 }
00476 
00477 
00478 
00479 
00480 /******************  functions related to the parsing&tuning  of the dipolar parameters **********/
00481                          
00482 
00483 void dp3m_set_tune_params(double r_cut, int mesh, int cao,
00484                          double alpha, double accuracy, int n_interpol)
00485 {
00486   if (r_cut >= 0) {
00487     dp3m.params.r_cut    = r_cut;
00488     dp3m.params.r_cut_iL = r_cut*box_l_i[0];
00489   }
00490 
00491   if (mesh >= 0)
00492     dp3m.params.mesh[2] = dp3m.params.mesh[1] = dp3m.params.mesh[0] = mesh;
00493 
00494   if (cao >= 0)
00495     dp3m.params.cao = cao;
00496 
00497   if (alpha >= 0) {
00498     dp3m.params.alpha   = alpha;
00499     dp3m.params.alpha_L = alpha*box_l[0];
00500   }
00501 
00502   if (accuracy >= 0)
00503     dp3m.params.accuracy = accuracy;
00504 
00505   if (n_interpol != -1)
00506     dp3m.params.inter = n_interpol;
00507 
00508   coulomb.Dprefactor = (temperature > 0) ? temperature*coulomb.Dbjerrum : coulomb.Dbjerrum;
00509 
00510 }
00511 
00512 
00513 /*****************************************************************************/
00514 
00515 int dp3m_set_params(double r_cut, int mesh, int cao,
00516                     double alpha, double accuracy)
00517 {
00518   if (coulomb.Dmethod != DIPOLAR_P3M && coulomb.Dmethod != DIPOLAR_MDLC_P3M)
00519     coulomb.Dmethod = DIPOLAR_P3M;
00520     
00521   if(r_cut < 0)
00522     return -1;
00523 
00524   if(mesh < 0)
00525     return -2;
00526 
00527   if(cao < 1 || cao > 7 || cao > mesh)
00528     return -3;
00529 
00530   dp3m.params.r_cut    = r_cut;
00531   dp3m.params.r_cut_iL = r_cut*box_l_i[0];
00532   dp3m.params.mesh[2]  = dp3m.params.mesh[1] = dp3m.params.mesh[0] = mesh;
00533   dp3m.params.cao      = cao;
00534 
00535   if (alpha > 0) {
00536     dp3m.params.alpha   = alpha;
00537     dp3m.params.alpha_L = alpha*box_l[0];
00538   }
00539   else
00540     if (alpha != -1.0)
00541       return -4;
00542 
00543   if (accuracy >= 0)
00544     dp3m.params.accuracy = accuracy;
00545   else
00546     if (accuracy != -1.0)
00547       return -5;
00548 
00549   mpi_bcast_coulomb_params();
00550 
00551   return 0;
00552 }
00553 
00554 
00555 int dp3m_set_mesh_offset(double x, double y, double z)
00556 {
00557   if(x < 0.0 || x > 1.0 ||
00558      y < 0.0 || y > 1.0 ||
00559      z < 0.0 || z > 1.0 )
00560     return ES_ERROR;
00561 
00562   dp3m.params.mesh_off[0] = x;
00563   dp3m.params.mesh_off[1] = y;
00564   dp3m.params.mesh_off[2] = z;
00565 
00566   mpi_bcast_coulomb_params();
00567 
00568   return ES_OK;
00569 }
00570 
00571 /* We left the handling of the epsilon, due to portability reasons in
00572 the future for the electrical dipoles, or if people wants to do
00573 electrical dipoles alone using the magnetic code .. */
00574 
00575 int dp3m_set_eps(double eps)
00576 {
00577   dp3m.params.epsilon = eps;
00578 
00579   fprintf(stderr,">> dp3m.params.epsilon =%lf\n",dp3m.params.epsilon);
00580   fprintf(stderr,"if you are doing true MAGNETIC CALCULATIONS the value of Depsilon should be 1, if you change it, you go on your own risk ...\n");
00581 
00582   mpi_bcast_coulomb_params();
00583 
00584   return ES_OK;
00585 }
00586 
00587 
00588 int dp3m_set_ninterpol(int n)
00589 {
00590   if (n < 0)
00591     return ES_ERROR;
00592 
00593   dp3m.params.inter = n;
00594 
00595   mpi_bcast_coulomb_params();
00596 
00597   return ES_OK;
00598 }
00599 
00600 /*****************************************************************************/
00601 
00602 
00603 void dp3m_interpolate_dipole_assignment_function()
00604 {
00605   double dInterpol = 0.5 / (double)dp3m.params.inter;
00606   int i;
00607   long j;
00608 
00609       dInterpol = 0.5 / (double)dp3m.params.inter;  
00610     if (dp3m.params.inter == 0) return;
00611 
00612         P3M_TRACE(fprintf(stderr,"dipolar %d - interpolating (%d) the order-%d charge assignment function\n",
00613                        this_node,dp3m.params.inter,dp3m.params.cao));
00614 
00615          dp3m.params.inter2 = 2*dp3m.params.inter + 1;
00616 
00617           for (i=0; i < dp3m.params.cao; i++) {
00618              /* allocate memory for interpolation array */
00619              dp3m.int_caf[i] = (double *) realloc(dp3m.int_caf[i], sizeof(double)*(2*dp3m.params.inter+1));
00620 
00621             /* loop over all interpolation points */
00622               for (j=-dp3m.params.inter; j<=dp3m.params.inter; j++)
00623                     dp3m.int_caf[i][j+dp3m.params.inter] = p3m_caf(i, j*dInterpol,dp3m.params.cao);
00624          }
00625 }
00626 
00627 /* assign the dipoles */
00628 void dp3m_dipole_assign(void)
00629 {
00630   Cell *cell;
00631   Particle *p;
00632   int i,c,np,j;
00633   /* magnetic particle counter, dipole fraction counter */
00634   int cp_cnt=0;
00635   
00636   
00637   /* prepare local FFT mesh */
00638     for(i=0;i<3;i++)
00639       for(j=0; j<dp3m.local_mesh.size; j++) dp3m.rs_mesh_dip[i][j] = 0.0;
00640 
00641   for (c = 0; c < local_cells.n; c++) {
00642     cell = local_cells.cell[c];
00643     p  = cell->part;
00644     np = cell->n;
00645     for(i = 0; i < np; i++) {
00646       if( p[i].p.dipm != 0.0) {
00647         dp3m_assign_dipole( p[i].r.p,p[i].p.dipm, p[i].r.dip,cp_cnt);
00648         cp_cnt++;
00649       }
00650     }
00651    } 
00652    dp3m_shrink_wrap_dipole_grid(cp_cnt);
00653 
00654 }
00655 
00656 
00657 void dp3m_assign_dipole(double real_pos[3],double mu, double dip[3],int cp_cnt)
00658 {
00659   /* we do not really want to export these, but this function should be inlined */
00660   double p3m_caf(int i, double x, int cao_value);
00661   void dp3m_realloc_ca_fields(int size);
00662 
00663   int d, i0, i1, i2;
00664   double tmp0, tmp1;
00665   /* position of a particle in local mesh units */
00666   double pos;
00667   /* 1d-index of nearest mesh point */
00668   int nmp;
00669   /* distance to nearest mesh point */
00670   double dist[3];
00671   /* index for caf interpolation grid */
00672   int arg[3];
00673   /* index, index jumps for dp3m.rs_mesh array */
00674   int q_ind = 0;
00675   double cur_ca_frac_val, *cur_ca_frac;
00676 
00677   // make sure we have enough space
00678   if (cp_cnt >= dp3m.ca_num) dp3m_realloc_ca_fields(cp_cnt + 1);
00679   // do it here, since p3m_realloc_ca_fields may change the address of dp3m.ca_frac
00680   cur_ca_frac = dp3m.ca_frac + dp3m.params.cao3*cp_cnt;
00681 
00682   if (dp3m.params.inter == 0) {
00683     for(d=0;d<3;d++) {
00684       /* particle position in mesh coordinates */
00685       pos    = ((real_pos[d]-dp3m.local_mesh.ld_pos[d])*dp3m.params.ai[d]) - dp3m.pos_shift;
00686       /* nearest mesh point */
00687       nmp  = (int)pos;
00688       /* distance to nearest mesh point */
00689       dist[d] = (pos-nmp)-0.5;
00690       /* 3d-array index of nearest mesh point */
00691       q_ind = (d == 0) ? nmp : nmp + dp3m.local_mesh.dim[d]*q_ind;
00692 
00693 #ifdef ADDITIONAL_CHECKS
00694       if( pos < -skin*dp3m.params.ai[d] ) {
00695         fprintf(stderr,"%d: dipolar dp3m.rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]);
00696         fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00697                 this_node,my_left[d] - skin, my_right[d] + skin);           
00698       }
00699       if( (nmp + dp3m.params.cao) > dp3m.local_mesh.dim[d] ) {
00700         fprintf(stderr,"%d: dipolar dp3m.rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp);
00701         fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00702                 this_node, my_left[d] - skin, my_right[d] + skin);
00703       }
00704 #endif
00705     }
00706     if (cp_cnt >= 0) dp3m.ca_fmp[cp_cnt] = q_ind;
00707     
00708     for(i0=0; i0<dp3m.params.cao; i0++) {
00709       tmp0 = p3m_caf(i0, dist[0], dp3m.params.cao);
00710       for(i1=0; i1<dp3m.params.cao; i1++) {
00711         tmp1 = tmp0 * p3m_caf(i1, dist[1],dp3m.params.cao);
00712         for(i2=0; i2<dp3m.params.cao; i2++) {
00713           cur_ca_frac_val = tmp1 * p3m_caf(i2, dist[2],dp3m.params.cao);
00714           if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val;
00715           if (mu != 0.0) {
00716             dp3m.rs_mesh_dip[0][q_ind] += dip[0] * cur_ca_frac_val;
00717             dp3m.rs_mesh_dip[1][q_ind] += dip[1] * cur_ca_frac_val;
00718             dp3m.rs_mesh_dip[2][q_ind] += dip[2] * cur_ca_frac_val;
00719           }
00720           q_ind++;
00721         }
00722         q_ind += dp3m.local_mesh.q_2_off;
00723       }
00724       q_ind += dp3m.local_mesh.q_21_off;
00725     }
00726   }
00727   else {
00728     /* particle position in mesh coordinates */
00729     for(d=0;d<3;d++) {
00730       pos    = ((real_pos[d]-dp3m.local_mesh.ld_pos[d])*dp3m.params.ai[d]) - dp3m.pos_shift;
00731       nmp    = (int) pos;
00732       arg[d] = (int) ((pos - nmp)*dp3m.params.inter2);
00733       /* for the first dimension, q_ind is always zero, so this shifts correctly */
00734       q_ind = nmp + dp3m.local_mesh.dim[d]*q_ind;
00735 
00736 #ifdef ADDITIONAL_CHECKS
00737       if( pos < -skin*dp3m.params.ai[d] ) {
00738         fprintf(stderr,"%d: dipolar dp3m.rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]);
00739         fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00740                 this_node,my_left[d] - skin, my_right[d] + skin);           
00741       }
00742       if( (nmp + dp3m.params.cao) > dp3m.local_mesh.dim[d] ) {
00743         fprintf(stderr,"%d: dipolar dp3m.rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp);
00744         fprintf(stderr,"%d: allowed coordinates: %f - %f\n",
00745                 this_node, my_left[d] - skin, my_right[d] + skin);
00746       }
00747 #endif
00748     }
00749     if (cp_cnt >= 0) dp3m.ca_fmp[cp_cnt] = q_ind;
00750 
00751     for(i0=0; i0<dp3m.params.cao; i0++) {
00752       tmp0 = dp3m.int_caf[i0][arg[0]];
00753       for(i1=0; i1<dp3m.params.cao; i1++) {
00754         tmp1 = tmp0 * dp3m.int_caf[i1][arg[1]];
00755         for(i2=0; i2<dp3m.params.cao; i2++) {
00756           cur_ca_frac_val = tmp1 * dp3m.int_caf[i2][arg[2]];
00757           if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val;
00758           if (mu != 0.0) {
00759             dp3m.rs_mesh_dip[0][q_ind] += dip[0] * cur_ca_frac_val;
00760             dp3m.rs_mesh_dip[1][q_ind] += dip[1] * cur_ca_frac_val;
00761             dp3m.rs_mesh_dip[2][q_ind] += dip[2] * cur_ca_frac_val;
00762           }
00763           q_ind++;
00764         }
00765         q_ind += dp3m.local_mesh.q_2_off;
00766       }
00767       q_ind += dp3m.local_mesh.q_21_off;
00768     }
00769   }
00770  }
00771 
00772 
00773 /** shrink wrap the dipoles grid */
00774 void dp3m_shrink_wrap_dipole_grid(int n_dipoles) {
00775   if( n_dipoles < dp3m.ca_num ) dp3m_realloc_ca_fields(n_dipoles);
00776 }
00777 
00778 
00779 #ifdef ROTATION
00780 /* assign the torques obtained from k-space */
00781 static void P3M_assign_torques(double prefac, int d_rs)
00782 {
00783   Cell *cell;
00784   Particle *p;
00785   int i,c,np,i0,i1,i2;
00786   /* particle counter, charge fraction counter */
00787   int cp_cnt=0, cf_cnt=0;
00788   /* index, index jumps for dp3m.rs_mesh array */
00789   int q_ind;
00790   int q_m_off = (dp3m.local_mesh.dim[2] - dp3m.params.cao);
00791   int q_s_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao);
00792 #ifdef ONEPART_DEBUG
00793   double db_fsum=0 ; /* TODO: db_fsum was missing and code couldn't compile. Now the arbitrary value of 0 is assigned to it, please check.*/ 
00794 #endif
00795 
00796   cp_cnt=0; cf_cnt=0;
00797   for (c = 0; c < local_cells.n; c++) {
00798     cell = local_cells.cell[c];
00799     p  = cell->part;
00800     np = cell->n;
00801     for(i=0; i<np; i++) { 
00802       if( (p[i].p.dipm) != 0.0 ) {
00803         q_ind = dp3m.ca_fmp[cp_cnt];
00804         for(i0=0; i0<dp3m.params.cao; i0++) {
00805           for(i1=0; i1<dp3m.params.cao; i1++) {
00806             for(i2=0; i2<dp3m.params.cao; i2++) {
00807 /*
00808 The following line would fill the torque with the k-space electric field
00809 (without the self-field term) [notice the minus sign!]:           
00810                     p[i].f.torque[d_rs] -= prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];;
00811 Since the torque is the dipole moment cross-product with E, we have:    
00812 */
00813               switch (d_rs) {
00814                 case 0: //E_x
00815                   p[i].f.torque[1] -= p[i].r.dip[2]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];     
00816                   p[i].f.torque[2] += p[i].r.dip[1]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 
00817                   break;
00818                 case 1: //E_y
00819                   p[i].f.torque[0] += p[i].r.dip[2]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];  
00820                   p[i].f.torque[2] -= p[i].r.dip[0]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];  
00821                   break;
00822                 case 2: //E_z
00823                   p[i].f.torque[0] -= p[i].r.dip[1]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];  
00824                   p[i].f.torque[1] += p[i].r.dip[0]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];  
00825               }
00826               q_ind++; 
00827               cf_cnt++;
00828             }
00829             q_ind += q_m_off;
00830           }
00831           q_ind += q_s_off;
00832         }
00833         cp_cnt++;
00834 
00835         ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M  f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum));
00836       }
00837     }
00838   }
00839 }
00840 #endif
00841 
00842 
00843 
00844 /* assign the dipolar forces obtained from k-space */
00845 static void dp3m_assign_forces_dip(double prefac, int d_rs)
00846 {
00847   Cell *cell;
00848   Particle *p;
00849 #ifdef ONEPART_DEBUG
00850   double db_fsum=0 ; /* TODO: db_fsum was missing and code couldn't compile. Now the arbitrary value of 0 is assigned to it, please check.*/ 
00851 #endif
00852   int i,c,np,i0,i1,i2;
00853   /* particle counter, charge fraction counter */
00854   int cp_cnt=0, cf_cnt=0;
00855   /* index, index jumps for dp3m.rs_mesh array */
00856   int q_ind;
00857   int q_m_off = (dp3m.local_mesh.dim[2] - dp3m.params.cao);
00858   int q_s_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao);
00859 
00860   cp_cnt=0; cf_cnt=0;
00861   for (c = 0; c < local_cells.n; c++) {
00862     cell = local_cells.cell[c];
00863     p  = cell->part;
00864     np = cell->n;
00865     for(i=0; i<np; i++) { 
00866       if( (p[i].p.dipm) != 0.0 ) {
00867         q_ind = dp3m.ca_fmp[cp_cnt];
00868         for(i0=0; i0<dp3m.params.cao; i0++) {
00869           for(i1=0; i1<dp3m.params.cao; i1++) {
00870             for(i2=0; i2<dp3m.params.cao; i2++) {
00871               p[i].f.f[d_rs] += prefac*dp3m.ca_frac[cf_cnt]*
00872                                   ( dp3m.rs_mesh_dip[0][q_ind]*p[i].r.dip[0]
00873                                   +dp3m.rs_mesh_dip[1][q_ind]*p[i].r.dip[1]
00874                                   +dp3m.rs_mesh_dip[2][q_ind]*p[i].r.dip[2]);
00875               q_ind++;
00876               cf_cnt++;
00877             }
00878             q_ind += q_m_off;
00879           }
00880           q_ind += q_s_off;
00881         }
00882         cp_cnt++;
00883 
00884         ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M  f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum));
00885       }
00886     }
00887   }
00888 }
00889 
00890 /*****************************************************************************/
00891 
00892 
00893 double dp3m_calc_kspace_forces(int force_flag, int energy_flag) 
00894 {
00895   int i,d,d_rs,ind,j[3];
00896   /**************************************************************/
00897    /* k space energy */
00898   double dipole_prefac;
00899   double surface_term=0.0;
00900   double k_space_energy_dip=0.0, node_k_space_energy_dip=0.0;
00901   double tmp0,tmp1;
00902 
00903   P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_perform(%d,%d): \n",this_node, force_flag, energy_flag));
00904 
00905   dipole_prefac = coulomb.Dprefactor / (double)(dp3m.params.mesh[0]*dp3m.params.mesh[1]*dp3m.params.mesh[2]);
00906  
00907   if (dp3m.sum_mu2 > 0) { 
00908     /* Gather information for FFT grid inside the nodes domain (inner local mesh) */
00909     /* and Perform forward 3D FFT (Charge Assignment Mesh). */
00910     dp3m_gather_fft_grid(dp3m.rs_mesh_dip[0]);
00911     dp3m_gather_fft_grid(dp3m.rs_mesh_dip[1]);
00912     dp3m_gather_fft_grid(dp3m.rs_mesh_dip[2]);
00913     dfft_perform_forw(dp3m.rs_mesh_dip[0]);
00914     dfft_perform_forw(dp3m.rs_mesh_dip[1]);
00915     dfft_perform_forw(dp3m.rs_mesh_dip[2]);
00916     //Note: after these calls, the grids are in the order yzx and not xyz anymore!!!
00917   }
00918   
00919   /* === K Space Calculations === */
00920   P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_perform: k-Space\n",this_node));
00921 
00922   /* === K Space Energy Calculation  === */
00923   if(energy_flag) {
00924 /*********************
00925    Dipolar energy
00926 **********************/
00927   if (dp3m.sum_mu2 > 0) {
00928     P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start Energy calculation: k-Space\n",this_node));
00929     
00930     /* i*k differentiation for dipolar gradients: |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
00931     ind=0;
00932     i=0;
00933     for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) {
00934       for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) {
00935         for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) {
00936           node_k_space_energy_dip += dp3m.g_energy[i] * (
00937           SQR(dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
00938               dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
00939               dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]
00940           ) +
00941           SQR(dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
00942               dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
00943               dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]
00944               ));
00945           ind += 2;
00946           i++;
00947         }
00948       }
00949     }
00950     node_k_space_energy_dip *= dipole_prefac * PI / box_l[0];
00951     MPI_Reduce(&node_k_space_energy_dip, &k_space_energy_dip, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart);
00952    
00953     dp3m_compute_constants_energy_dipolar(); 
00954    
00955     P3M_TRACE(fprintf(stderr,"%d: dp3m.params.epsilon=%lf\n", this_node, dp3m.params.epsilon));
00956    
00957     if(this_node==0) {
00958       /* self energy correction */
00959       P3M_TRACE(fprintf(stderr,"%d: *dp3m.energy_correction=%20.15lf\n",\
00960                         this_node, dp3m.energy_correction));
00961 #ifdef P3M_DEBUG
00962       double a = k_space_energy_dip;
00963 #endif
00964       k_space_energy_dip -= coulomb.Dprefactor*(dp3m.sum_mu2*2*pow(dp3m.params.alpha_L*box_l_i[0],3) * wupii/3.0);
00965 
00966       double volume=box_l[0]*box_l[1]*box_l[2];
00967       k_space_energy_dip += coulomb.Dprefactor*dp3m.energy_correction/volume; /* add the dipolar energy correction due to systematic Madelung-Self effects */  
00968       
00969       P3M_TRACE(fprintf(stderr, "%d: Energy correction: %lf\n", this_node, k_space_energy_dip - a));
00970     }
00971 
00972     P3M_TRACE(fprintf(stderr,"%d: dipolar p3m end Energy calculation: k-Space\n",this_node));
00973 
00974 }
00975 } //if (energy_flag)
00976 
00977   /* === K Space Force Calculation  === */
00978   if(force_flag) {
00979   /***************************        
00980    DIPOLAR TORQUES (k-space)
00981 ****************************/
00982   if (dp3m.sum_mu2 > 0) {
00983  #ifdef ROTATION
00984    P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start torques calculation: k-Space\n",this_node));
00985 
00986     /* fill in ks_mesh array for torque calculation */
00987     ind=0;
00988     i=0;
00989        
00990     for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) {             //j[0]=n_y
00991       for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) {    //j[1]=n_z
00992         for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) {  //j[2]=n_x
00993           //tmp0 = Re(mu)*k,   tmp1 = Im(mu)*k
00994           
00995           tmp0 = dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
00996                  dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
00997                  dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]];
00998                  
00999           tmp1 = dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
01000                  dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
01001                  dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]];
01002                  
01003           /* the optimal influence function is the same for torques
01004              and energy */ 
01005              
01006           dp3m.ks_mesh[ind]   = tmp0*dp3m.g_energy[i]; 
01007           dp3m.ks_mesh[ind+1] = tmp1*dp3m.g_energy[i];
01008           ind += 2;
01009           i++;
01010         }
01011       }
01012     }
01013  
01014         
01015     /* Force component loop */
01016     for(d=0;d<3;d++) {
01017       d_rs = (d+dp3m.ks_pnum)%3;
01018       ind=0;
01019       for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) {
01020         for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) {
01021           for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) {
01022             dp3m.rs_mesh[ind] = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; ind++;
01023             dp3m.rs_mesh[ind] = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; ind++;
01024           }
01025         }
01026       }
01027 
01028 
01029       /* Back FFT force component mesh */
01030       dfft_perform_back(dp3m.rs_mesh);
01031       /* redistribute force component mesh */
01032       dp3m_spread_force_grid(dp3m.rs_mesh);  
01033       /* Assign force component from mesh to particle */
01034       P3M_assign_torques(dipole_prefac*(2*PI/box_l[0]), d_rs);
01035     }
01036     P3M_TRACE(fprintf(stderr, "%d: done torque calculation.\n", this_node));
01037  #endif  /*if def ROTATION */ 
01038     
01039 /***************************
01040    DIPOLAR FORCES (k-space)
01041 ****************************/
01042     P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start forces calculation: k-Space\n",this_node));
01043 
01044 //Compute forces after torques because the algorithm below overwrites the grids dp3m.rs_mesh_dip !
01045 //Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this number to 6 !
01046     /* fill in ks_mesh array for force calculation */
01047     ind=0;
01048     i=0;
01049     for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) {             //j[0]=n_y
01050       for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) {    //j[1]=n_z
01051         for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) {  //j[2]=n_x
01052           //tmp0 = Im(mu)*k,   tmp1 = -Re(mu)*k
01053           tmp0 = dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
01054                  dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
01055                  dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]];
01056           tmp1 = dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+
01057                  dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+
01058                  dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]];
01059           dp3m.ks_mesh[ind]   = tmp0*dp3m.g_force[i];
01060           dp3m.ks_mesh[ind+1] = -tmp1*dp3m.g_force[i];
01061           ind += 2;
01062           i++;
01063         }
01064       }
01065     }
01066 
01067     /* Force component loop */
01068     for(d=0;d<3;d++) {       /* direction in k space: */
01069     d_rs = (d+dp3m.ks_pnum)%3;
01070     ind=0;
01071     for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) {             //j[0]=n_y
01072       for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) {    //j[1]=n_z
01073         for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) {  //j[2]=n_x
01074           tmp0 = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind];
01075           dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[ j[2]+dfft.plan[3].start[2] ]*tmp0;
01076           dp3m.rs_mesh_dip[1][ind] = dp3m.d_op[ j[0]+dfft.plan[3].start[0] ]*tmp0;
01077           dp3m.rs_mesh_dip[2][ind] = dp3m.d_op[ j[1]+dfft.plan[3].start[1] ]*tmp0;
01078           ind++;
01079           tmp0 = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind];
01080           dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[ j[2]+dfft.plan[3].start[2] ]*tmp0;
01081           dp3m.rs_mesh_dip[1][ind] = dp3m.d_op[ j[0]+dfft.plan[3].start[0] ]*tmp0;
01082           dp3m.rs_mesh_dip[2][ind] = dp3m.d_op[ j[1]+dfft.plan[3].start[1] ]*tmp0;
01083           ind++;
01084         }
01085       }
01086     }
01087       /* Back FFT force component mesh */
01088       dfft_perform_back(dp3m.rs_mesh_dip[0]);
01089       dfft_perform_back(dp3m.rs_mesh_dip[1]);
01090       dfft_perform_back(dp3m.rs_mesh_dip[2]);
01091       /* redistribute force component mesh */
01092       dp3m_spread_force_grid(dp3m.rs_mesh_dip[0]);
01093       dp3m_spread_force_grid(dp3m.rs_mesh_dip[1]);
01094       dp3m_spread_force_grid(dp3m.rs_mesh_dip[2]);
01095       /* Assign force component from mesh to particle */
01096       dp3m_assign_forces_dip(dipole_prefac*pow(2*PI/box_l[0],2), d_rs); 
01097    }
01098    
01099        P3M_TRACE(fprintf(stderr,"%d: dipolar p3m end forces calculation: k-Space\n",this_node));
01100 
01101    
01102  } /* of if (dp3m.sum_mu2>0 */
01103 } /* of if(force_flag) */
01104  
01105   if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
01106     surface_term = calc_surface_term(force_flag, energy_flag);
01107     if(this_node == 0)
01108       k_space_energy_dip += surface_term;
01109    }
01110 
01111 
01112   return k_space_energy_dip;
01113 }
01114 
01115 
01116 
01117 /************************************************************/
01118 
01119 double calc_surface_term(int force_flag, int energy_flag)
01120 {
01121  
01122   int np, c, i,ip=0,n_local_part=0;
01123   Particle *part;
01124   double pref =coulomb.Dprefactor*4*M_PI*box_l_i[0]*box_l_i[1]*box_l_i[2]/(2*dp3m.params.epsilon + 1);
01125   double suma,a[3];
01126   double en;
01127   double  *mx=NULL,*my=NULL,*mz=NULL;
01128 
01129      for (c = 0; c < local_cells.n; c++)
01130        n_local_part += local_cells.cell[c]->n;
01131 
01132      // We put all the dipolar momenta in a the arrays mx,my,mz according to the id-number of the particles   
01133      mx = (double *) malloc(sizeof(double)*n_local_part);
01134      my = (double *) malloc(sizeof(double)*n_local_part);
01135      mz = (double *) malloc(sizeof(double)*n_local_part);
01136     
01137      
01138      
01139      for (c = 0; c < local_cells.n; c++) {
01140        np   = local_cells.cell[c]->n;
01141        part = local_cells.cell[c]->part;
01142        for (i = 0; i < np; i++){
01143          mx[ip]=part[i].r.dip[0];
01144          my[ip]=part[i].r.dip[1];
01145          mz[ip]=part[i].r.dip[2];        
01146          ip++;
01147       }  
01148      } 
01149 
01150      // we will need the sum of all dipolar momenta vectors    
01151       a[0]=0.0;
01152       a[1]=0.0;
01153       a[2]=0.0;
01154 
01155       for (i = 0; i < n_local_part; i++){
01156          a[0]+=mx[i];
01157          a[1]+=my[i];
01158          a[2]+=mz[i];
01159       }   
01160   
01161       MPI_Allreduce(MPI_IN_PLACE, a, 3, MPI_DOUBLE, MPI_SUM, comm_cart);
01162      
01163      if (energy_flag) {
01164       
01165         suma=0.0;
01166         for (i = 0; i < n_local_part; i++){
01167               suma+=mx[i]*a[0]+my[i]*a[1]+mz[i]*a[2];
01168         }          
01169         MPI_Allreduce(MPI_IN_PLACE, &suma, 1, MPI_DOUBLE, MPI_SUM, comm_cart);
01170         en = 0.5*pref*suma;
01171        
01172      } else {
01173         en = 0;
01174      } 
01175      #ifdef ROTATION                 
01176      if (force_flag) {
01177           //fprintf(stderr," number of particles= %d ",n_total_particles);   
01178 
01179           double *sumix = (double *) malloc(sizeof(double)*n_local_part);
01180           double *sumiy = (double *) malloc(sizeof(double)*n_local_part);
01181           double *sumiz = (double *) malloc(sizeof(double)*n_local_part);
01182           
01183           for (i = 0; i < n_local_part; i++){
01184             sumix[i]=my[i]*a[2]-mz[i]*a[1];
01185             sumiy[i]=mz[i]*a[0]-mx[i]*a[2];
01186             sumiz[i]=mx[i]*a[1]-my[i]*a[0];
01187           }
01188             
01189          // for (i = 0; i < n_total_particles; i++){
01190          //    fprintf(stderr,"part %d, correccions torque  x:%le, y:%le, z:%le\n",i,sumix[i],sumiy[i],sumiz[i]);
01191          // }
01192               
01193           ip=0;
01194           for (c = 0; c < local_cells.n; c++) {
01195              np = local_cells.cell[c]->n;
01196              part = local_cells.cell[c]->part;
01197              for (i = 0; i < np; i++){
01198                 part[i].f.torque[0] -= pref*sumix[ip];
01199                 part[i].f.torque[1] -= pref*sumiy[ip];
01200                 part[i].f.torque[2] -= pref*sumiz[ip];
01201                 ip++;
01202              }  
01203           }
01204           
01205              
01206           free(sumix);     
01207           free(sumiy);     
01208           free(sumiz);     
01209      }
01210     #endif
01211 
01212     free(mx);    
01213     free(my);    
01214     free(mz);    
01215             
01216   return en;
01217  
01218 }
01219 
01220 
01221 /************************************************************/
01222 void dp3m_gather_fft_grid(double* themesh)
01223 {
01224   int s_dir,r_dir,evenodd;
01225   MPI_Status status;
01226   double *tmp_ptr;
01227 
01228   P3M_TRACE(fprintf(stderr,"%d: dp3m_gather_fft_grid:\n",this_node));
01229 
01230   /* direction loop */
01231   for(s_dir=0; s_dir<6; s_dir++) {
01232     if(s_dir%2==0) r_dir = s_dir+1;
01233     else           r_dir = s_dir-1;
01234     /* pack send block */ 
01235     if(dp3m.sm.s_size[s_dir]>0) 
01236       fft_pack_block(themesh, dp3m.send_grid, dp3m.sm.s_ld[s_dir], dp3m.sm.s_dim[s_dir], dp3m.local_mesh.dim, 1);
01237       
01238     /* communication */
01239     if(node_neighbors[s_dir] != this_node) {
01240       for(evenodd=0; evenodd<2;evenodd++) {
01241         if((node_pos[s_dir/2]+evenodd)%2==0) {
01242           if(dp3m.sm.s_size[s_dir]>0) 
01243             MPI_Send(dp3m.send_grid, dp3m.sm.s_size[s_dir], MPI_DOUBLE, 
01244                      node_neighbors[s_dir], REQ_P3M_GATHER_D, comm_cart);
01245         }
01246         else {
01247           if(dp3m.sm.r_size[r_dir]>0) 
01248             MPI_Recv(dp3m.recv_grid, dp3m.sm.r_size[r_dir], MPI_DOUBLE, 
01249                      node_neighbors[r_dir], REQ_P3M_GATHER_D, comm_cart, &status);          
01250         }
01251       }
01252     }
01253     else {
01254       tmp_ptr = dp3m.recv_grid;
01255       dp3m.recv_grid = dp3m.send_grid;
01256       dp3m.send_grid = tmp_ptr;
01257     }
01258     /* add recv block */
01259     if(dp3m.sm.r_size[r_dir]>0) {
01260       p3m_add_block(dp3m.recv_grid, themesh, dp3m.sm.r_ld[r_dir], dp3m.sm.r_dim[r_dir], dp3m.local_mesh.dim); 
01261     }
01262   }
01263 }
01264 
01265 
01266 
01267 /************************************************************/
01268 
01269 
01270 void dp3m_spread_force_grid(double* themesh)
01271 {
01272   int s_dir,r_dir,evenodd;
01273   MPI_Status status;
01274   double *tmp_ptr;
01275   P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_spread_force_grid:\n",this_node));
01276 
01277   /* direction loop */
01278   for(s_dir=5; s_dir>=0; s_dir--) {
01279     if(s_dir%2==0) r_dir = s_dir+1;
01280     else           r_dir = s_dir-1;
01281     /* pack send block */ 
01282     if(dp3m.sm.s_size[s_dir]>0) 
01283       fft_pack_block(themesh, dp3m.send_grid, dp3m.sm.r_ld[r_dir], dp3m.sm.r_dim[r_dir], dp3m.local_mesh.dim, 1);
01284     /* communication */
01285     if(node_neighbors[r_dir] != this_node) {
01286       for(evenodd=0; evenodd<2;evenodd++) {
01287         if((node_pos[r_dir/2]+evenodd)%2==0) {
01288           if(dp3m.sm.r_size[r_dir]>0) 
01289             MPI_Send(dp3m.send_grid, dp3m.sm.r_size[r_dir], MPI_DOUBLE, 
01290                      node_neighbors[r_dir], REQ_P3M_SPREAD_D, comm_cart);
01291         }
01292         else {
01293           if(dp3m.sm.s_size[s_dir]>0) 
01294             MPI_Recv(dp3m.recv_grid, dp3m.sm.s_size[s_dir], MPI_DOUBLE, 
01295                      node_neighbors[s_dir], REQ_P3M_SPREAD_D, comm_cart, &status);          
01296         }
01297       }
01298     }
01299     else {
01300       tmp_ptr = dp3m.recv_grid;
01301       dp3m.recv_grid = dp3m.send_grid;
01302       dp3m.send_grid = tmp_ptr;
01303     }
01304     /* un pack recv block */
01305     if(dp3m.sm.s_size[s_dir]>0) {
01306       fft_unpack_block(dp3m.recv_grid, themesh, dp3m.sm.s_ld[s_dir], dp3m.sm.s_dim[s_dir], dp3m.local_mesh.dim, 1); 
01307     }
01308   }
01309 }
01310 
01311 
01312 /*****************************************************************************/
01313 
01314 void dp3m_realloc_ca_fields(int newsize)
01315 {
01316   newsize = ((newsize + CA_INCREMENT - 1)/CA_INCREMENT)*CA_INCREMENT;
01317   if (newsize == dp3m.ca_num) return;
01318   if (newsize < CA_INCREMENT) newsize = CA_INCREMENT;
01319 
01320    P3M_TRACE(fprintf(stderr,"%d: p3m_realloc_ca_fields: dipolar,  old_size=%d -> new_size=%d\n",this_node,dp3m.ca_num,newsize));
01321    dp3m.ca_num = newsize;
01322    dp3m.ca_frac = (double *)realloc(dp3m.ca_frac, dp3m.params.cao3*dp3m.ca_num*sizeof(double));
01323    dp3m.ca_fmp  = (int *)realloc(dp3m.ca_fmp, dp3m.ca_num*sizeof(int));
01324   
01325 }
01326 
01327 
01328 /*****************************************************************************/
01329 
01330 
01331 void dp3m_calc_meshift(void)
01332 {
01333   int i;
01334   double dmesh;
01335      dmesh = (double)dp3m.params.mesh[0];
01336      dp3m.meshift = (double *) realloc(dp3m.meshift, dp3m.params.mesh[0]*sizeof(double));
01337      for (i=0; i<dp3m.params.mesh[0]; i++) dp3m.meshift[i] = i - dround(i/dmesh)*dmesh;   
01338 }
01339 
01340 
01341 
01342 /*****************************************************************************/
01343 
01344 
01345 void dp3m_calc_differential_operator()
01346 {
01347   int i;
01348   double dmesh;
01349 
01350   dmesh = (double)dp3m.params.mesh[0];
01351   dp3m.d_op = (double *) realloc(dp3m.d_op, dp3m.params.mesh[0]*sizeof(double));
01352 
01353   for (i=0; i<dp3m.params.mesh[0]; i++) 
01354     dp3m.d_op[i] = (double)i - dround((double)i/dmesh)*dmesh;
01355 
01356     dp3m.d_op[dp3m.params.mesh[0]/2] = 0;
01357 }
01358 
01359 /*****************************************************************************/
01360 
01361 
01362 void dp3m_calc_influence_function_force()
01363 {
01364   int i,n[3],ind;
01365   int end[3];
01366   int size=1;
01367   double fak1,fak2;
01368   double nominator[1]={0.0},denominator=0.0;
01369 
01370   dp3m_calc_meshift();
01371 
01372   for(i=0;i<3;i++) {
01373     size *= dfft.plan[3].new_mesh[i];
01374     end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i];
01375   }
01376   dp3m.g_force = (double *) realloc(dp3m.g_force, size*sizeof(double));
01377   fak1  = dp3m.params.mesh[0]*dp3m.params.mesh[0]*dp3m.params.mesh[0]*2.0/(box_l[0]*box_l[0]);
01378 
01379   for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++)
01380     for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++)
01381       for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) {
01382         ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] * ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0])));
01383 
01384         if( (n[0]==0) && (n[1]==0) && (n[2]==0) )
01385           dp3m.g_force[ind] = 0.0;
01386         else if( (n[0]%(dp3m.params.mesh[0]/2)==0) &&
01387                  (n[1]%(dp3m.params.mesh[0]/2)==0) &&
01388                  (n[2]%(dp3m.params.mesh[0]/2)==0) )
01389           dp3m.g_force[ind] = 0.0;
01390         else {
01391           denominator = dp3m_perform_aliasing_sums_force(n,nominator);
01392           fak2 =  nominator[0];
01393           fak2 /= pow(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]]),3)  * SQR(denominator) ;
01394           dp3m.g_force[ind] = fak1*fak2;
01395         }
01396       }
01397 }
01398 
01399 
01400 /*****************************************************************************/
01401 
01402 double dp3m_perform_aliasing_sums_force(int n[3], double nominator[1])
01403 {
01404   double denominator=0.0;
01405   /* lots of temporary variables... */
01406   double sx,sy,sz,f1,f2,f3,mx,my,mz,nmx,nmy,nmz,nm2,expo;
01407   double limit = 30;
01408   double n_nm;
01409   double n_nm3;
01410 
01411   nominator[0]=0.0;
01412   
01413   f1 = 1.0/(double)dp3m.params.mesh[0];
01414   f2 = SQR(PI/(dp3m.params.alpha_L));
01415 
01416   for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) {
01417     nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx;
01418     sx  = pow(sinc(f1*nmx),2.0*dp3m.params.cao);
01419     for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) {
01420       nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my;
01421       sy  = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao);
01422       for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) {
01423         nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz;
01424         sz  = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao);
01425         
01426         nm2          =  SQR(nmx)+SQR(nmy)+SQR(nmz);
01427         expo         =  f2*nm2;
01428         f3           =  (expo<limit) ? sz*exp(-expo)/nm2 : 0.0;
01429 
01430         n_nm = dp3m.d_op[n[0]]*nmx + dp3m.d_op[n[1]]*nmy + dp3m.d_op[n[2]]*nmz;
01431         n_nm3 = n_nm*n_nm*n_nm; 
01432         
01433         nominator[0] += f3*n_nm3;
01434         denominator  += sz;
01435       }
01436     }
01437   }
01438   return denominator;
01439 }
01440 
01441 
01442 /*****************************************************************************/
01443 
01444 void dp3m_calc_influence_function_energy()
01445 {
01446   int i,n[3],ind;
01447   int end[3];
01448   int size=1;
01449   double fak1,fak2;
01450   double nominator[1]={0.0},denominator=0.0;
01451 
01452   dp3m_calc_meshift();
01453 
01454   for(i=0;i<3;i++) {
01455     size *= dfft.plan[3].new_mesh[i];
01456     end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i];
01457   }
01458   dp3m.g_energy = (double *) realloc(dp3m.g_energy, size*sizeof(double));
01459   fak1  = dp3m.params.mesh[0]*dp3m.params.mesh[0]*dp3m.params.mesh[0]*2.0/(box_l[0]*box_l[0]);
01460 
01461   for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++)
01462     for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++)
01463       for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) {
01464         ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] * ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0])));
01465 
01466         if( (n[0]==0) && (n[1]==0) && (n[2]==0) )
01467           dp3m.g_energy[ind] = 0.0;
01468         else if( (n[0]%(dp3m.params.mesh[0]/2)==0) &&
01469                  (n[1]%(dp3m.params.mesh[0]/2)==0) &&
01470                  (n[2]%(dp3m.params.mesh[0]/2)==0) )
01471           dp3m.g_energy[ind] = 0.0;
01472         else {
01473           denominator = dp3m_perform_aliasing_sums_energy(n,nominator);
01474           fak2 =  nominator[0];
01475           fak2 /= pow(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]]),2)  * SQR(denominator) ;
01476           dp3m.g_energy[ind] = fak1*fak2;
01477         }
01478       }
01479 }
01480 
01481 /*****************************************************************************/
01482 
01483 double dp3m_perform_aliasing_sums_energy(int n[3], double nominator[1])
01484 { 
01485   double denominator=0.0;
01486   /* lots of temporary variables... */
01487   double sx,sy,sz,f1,f2,f3,mx,my,mz,nmx,nmy,nmz,nm2,expo;
01488   double limit = 30;
01489   double n_nm;
01490   double n_nm2;
01491 
01492   nominator[0]=0.0;
01493     
01494   f1 = 1.0/(double)dp3m.params.mesh[0];
01495   f2 = SQR(PI/(dp3m.params.alpha_L));
01496 
01497   for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) {
01498     nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx;
01499     sx  = pow(sinc(f1*nmx),2.0*dp3m.params.cao);
01500     for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) {
01501       nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my;
01502       sy  = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao);
01503       for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) {
01504         nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz;
01505         sz  = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao);
01506         
01507         nm2          =  SQR(nmx)+SQR(nmy)+SQR(nmz);
01508         expo         =  f2*nm2;
01509         f3           =  (expo<limit) ? sz*exp(-expo)/nm2 : 0.0;
01510 
01511         n_nm = dp3m.d_op[n[0]]*nmx + dp3m.d_op[n[1]]*nmy + dp3m.d_op[n[2]]*nmz;
01512         n_nm2 = n_nm*n_nm; 
01513         nominator[0] += f3*n_nm2;
01514         denominator  += sz;
01515       }
01516     }
01517   }
01518   return denominator;
01519 }
01520 
01521 
01522 /*****************************************************************************/
01523 
01524 
01525 /************************************************
01526  * Functions for dipoloar P3M Parameter tuning
01527  * This tuning is based on the P3M tuning of the charges
01528  which in turn is based on the P3M_tune by M. Deserno
01529  ************************************************/
01530 
01531 #define P3M_TUNE_MAX_CUTS 50
01532 /** Tune dipolar P3M parameters to desired accuracy.
01533 
01534     Usage:
01535     \verbatim inter dipolar <bjerrum> p3m tune accuracy <value> [r_cut <value> mesh <value> cao <value>] \endverbatim
01536 
01537     The parameters are tuned to obtain the desired accuracy in best
01538     time, by running mpi_integrate(0) for several parameter sets.
01539 
01540     The function utilizes the analytic expression of the error estimate 
01541     for the dipolar P3M method see JCP,2008 paper by J.J.Cerda et al in 
01542     order to obtain the rms error in the force for a system of N randomly 
01543     distributed particles in a cubic box.
01544     For the real space error the estimate of Kolafa/Perram is used. 
01545 
01546     Parameter range if not given explicit values: For \ref p3m_parameter_struct::r_cut_iL
01547     the function uses the values (\ref min_local_box_l -\ref #skin) /
01548     (n * \ref box_l), n being an integer (this implies the assumption that \ref
01549     p3m_parameter_struct::r_cut_iL is the largest cutoff in the system!). For \ref
01550     p3m_parameter_struct::mesh the function uses the two values which matches best the
01551     equation: number of mesh point = number of magnetic dipolar particles. For
01552     \ref p3m_parameter_struct::cao the function considers all possible values.
01553 
01554     For each setting \ref p3m_parameter_struct::alpha_L is calculated assuming that the
01555     error contributions of real and reciprocal space should be equal.
01556 
01557     After checking if the total error fulfils the accuracy goal the
01558     time needed for one force calculation (including verlet list
01559     update) is measured via \ref mpi_integrate (0).
01560 
01561     The function returns a log of the performed tuning.
01562 
01563     The function is based on routines for charges.
01564  */
01565 
01566 
01567 /*****************************************************************************/
01568 
01569 
01570 /** get the minimal error for this combination of parameters. In fact, the real space error is tuned such that it
01571     contributes half of the total error, and then the Fourier space error is calculated. Returns the error and the
01572     optimal alpha, or 0 if this combination does not work at all */
01573 double dp3m_get_accuracy(int mesh, int cao, double r_cut_iL, double *_alpha_L, double *_rs_err, double *_ks_err)
01574 {
01575   double rs_err, ks_err;
01576   double alpha_L;
01577   P3M_TRACE(fprintf(stderr, "dp3m_get_accuracy: mesh %d, cao %d, r_cut %f ", mesh, cao, r_cut_iL));
01578 
01579   /* calc maximal real space error for setting */
01580 
01581     //Alpha cannot be zero in the dipolar case because real_space formula breaks down        
01582     //Idem of the previous function tclcommand_inter_magnetic_dp3m_print_tune_parameters, here we do nothing
01583     rs_err =P3M_DIPOLAR_real_space_error(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2,0.001);
01584     
01585   
01586     if(M_SQRT2*rs_err > dp3m.params.accuracy) {
01587      /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
01588          alpha_L=dp3m_rtbisection(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2,
01589              0.0001*box_l[0],5.0*box_l[0],0.0001,dp3m.params.accuracy);
01590 
01591     }
01592 
01593   else
01594     /* even alpha=0 is ok, however, we cannot choose it since it kills the k-space error formula.
01595        Anyways, this very likely NOT the optimal solution */
01596     alpha_L = 0.1;
01597 
01598   *_alpha_L = alpha_L;
01599   /* calculate real space and k space error for this alpha_L */
01600 
01601     rs_err = P3M_DIPOLAR_real_space_error(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2,alpha_L);
01602     ks_err = dp3m_k_space_error(box_l[0],coulomb.Dprefactor,mesh,cao,dp3m.sum_dip_part,dp3m.sum_mu2,alpha_L);
01603 
01604   *_rs_err = rs_err;
01605   *_ks_err = ks_err;
01606   P3M_TRACE(fprintf(stderr, "dipolar tuning resulting: %f -> %f %f\n", alpha_L, rs_err, ks_err));
01607   return sqrt(SQR(rs_err)+SQR(ks_err));
01608 }
01609 
01610 
01611 /*****************************************************************************/
01612 
01613 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao, r_cut and alpha */
01614 static double dp3m_mcr_time(int mesh, int cao, double r_cut_iL, double alpha_L)
01615 {
01616   /* rounded up 2000/n_charges timing force evaluations */
01617   int int_num = (1999 + dp3m.sum_dip_part)/dp3m.sum_dip_part;
01618   
01619   /* broadcast p3m parameters for test run */
01620   if (coulomb.Dmethod != DIPOLAR_P3M && coulomb.Dmethod != DIPOLAR_MDLC_P3M)
01621     coulomb.Dmethod = DIPOLAR_P3M;
01622   dp3m.params.r_cut_iL = r_cut_iL;
01623   dp3m.params.mesh[0]  = dp3m.params.mesh[1] = dp3m.params.mesh[2] = mesh;
01624   dp3m.params.cao      = cao;
01625   dp3m.params.alpha_L  = alpha_L;
01626   dp3m_scaleby_box_l();
01627   /* initialize p3m structures */
01628   mpi_bcast_coulomb_params();
01629   /* perform force calculation test */
01630   return time_force_calc(int_num);    
01631 }
01632 
01633 /*****************************************************************************/
01634 /** get the optimal alpha and the corresponding computation time for
01635     fixed mesh, cao. The r_cut is determined via a simple
01636     bisection. Returns -1 if the force evaluation does not work, -2 if
01637     there is no valid r_cut, and -3 if the charge assigment order is
01638     to large for this grid */
01639 static double dp3m_mc_time(char **log, int mesh, int cao,
01640                            double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL,
01641                            double *_alpha_L, double *_accuracy)
01642 {
01643   double int_time;
01644   double r_cut_iL;
01645   double rs_err, ks_err, mesh_size, k_cut;
01646   int i, n_cells;
01647   char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128];
01648 
01649   /* initial checks. */
01650   mesh_size = box_l[0]/(double)mesh;
01651   k_cut =  mesh_size*cao/2.0;
01652   P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh=%d, cao=%d, rmin=%f, rmax=%f\n",
01653                     mesh, cao, r_cut_iL_min, r_cut_iL_max));
01654   if(cao >= mesh || k_cut >= dmin(min_box_l,min_local_box_l) - skin) {
01655     /* print result */
01656     sprintf(b,"%-4d %-3d  cao too large for this mesh\n", mesh, cao);
01657     *log = strcat_alloc(*log, b);
01658     return -3;
01659   }
01660 
01661   /* Either low and high boundary are equal (for fixed cut), or the low border is initially 0 and therefore
01662      has infinite error estimate, as required. Therefore if the high boundary fails, there is no possible r_cut */
01663   if ((*_accuracy = dp3m_get_accuracy(mesh, cao, r_cut_iL_max, _alpha_L, &rs_err, &ks_err)) > dp3m.params.accuracy) {
01664     /* print result */
01665     sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e accuracy not achieved\n",
01666             mesh, cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err);
01667     *log = strcat_alloc(*log, b);
01668     return -2;
01669   }
01670 
01671   for (;;) {
01672     P3M_TRACE(fprintf(stderr, "dp3m_mc_time: interval [%f,%f]\n", r_cut_iL_min, r_cut_iL_max));
01673     r_cut_iL = 0.5*(r_cut_iL_min + r_cut_iL_max);
01674 
01675     if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC)
01676       break;
01677 
01678     /* bisection */
01679     if (dp3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err) > dp3m.params.accuracy)
01680       r_cut_iL_min = r_cut_iL;
01681     else
01682       r_cut_iL_max = r_cut_iL;
01683   }
01684   /* final result is always the upper interval boundary, since only there
01685      we know that the desired minimal accuracy is obtained */
01686   *_r_cut_iL = r_cut_iL = r_cut_iL_max;
01687 
01688   /* check whether we are running P3M+DLC, and whether we leave a reasonable gap space */
01689   if (coulomb.Dmethod == DIPOLAR_MDLC_P3M) {
01690     char *errtxt = runtime_error(128);
01691     ERROR_SPRINTF(errtxt, "{dipolar P3M: tuning when dlc needs to be fixed} ");
01692   }
01693 
01694   /* check whether this radius is too large, so that we would use less cells than allowed */
01695   n_cells = 1;
01696   for (i = 0; i < 3; i++)
01697     n_cells *= (int)(floor(local_box_l[i]/(r_cut_iL*box_l[0] + skin)));
01698   if (n_cells < min_num_cells) {
01699     P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh %d cao %d r_cut %f reject n_cells %d\n", mesh, cao, r_cut_iL, n_cells));
01700     /* print result */
01701     sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e radius dangerously high\n\n",
01702             mesh, cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err);
01703     *log = strcat_alloc(*log, b);
01704     return -2;
01705   }
01706 
01707   int_time = dp3m_mcr_time(mesh, cao, r_cut_iL, *_alpha_L);
01708   if (int_time == -1) {
01709     *log = strcat_alloc(*log, "tuning failed, test integration not possible\n");
01710     return -1;
01711   }
01712 
01713   *_accuracy = dp3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err);
01714 
01715   P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh %d cao %d r_cut %f time %f\n", mesh, cao, r_cut_iL, int_time));
01716   /* print result */
01717   sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e %-8d\n",
01718           mesh, cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err, (int)int_time);
01719   *log = strcat_alloc(*log, b);
01720   return int_time;
01721 }
01722 
01723 
01724 /*****************************************************************************/
01725 
01726 /** get the optimal alpha and the corresponding computation time for fixed mesh. *cao
01727     should contain an initial guess, which is then adapted by stepping up and down. Returns the time
01728     upon completion, -1 if the force evaluation does not work, and -2 if the accuracy cannot be met */
01729 static double dp3m_m_time(char **log, int mesh,
01730                           int cao_min, int cao_max, int *_cao,
01731                           double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL,
01732                           double *_alpha_L, double *_accuracy)
01733 {
01734   double best_time = -1, tmp_time, tmp_r_cut_iL, tmp_alpha_L=0.0, tmp_accuracy=0.0;
01735   /* in which direction improvement is possible. Initially, we dont know it yet. */
01736   int final_dir = 0;
01737   int cao = *_cao;
01738 
01739   P3M_TRACE(fprintf(stderr, "dp3m_m_time: Dmesh=%d, Dcao_min=%d, Dcao_max=%d, Drmin=%f, Drmax=%f\n",
01740                     mesh, cao_min, cao_max, r_cut_iL_min, r_cut_iL_max));
01741   /* the initial step sets a timing mark. If there is no valid r_cut, we can only try
01742      to increase cao to increase the obtainable precision of the far formula. */
01743   do {
01744     tmp_time = dp3m_mc_time(log, mesh, cao,  r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01745     /* bail out if the force evaluation is not working */
01746     if (tmp_time == -1) return -1;
01747     /* cao is too large for this grid, but still the accuracy cannot be achieved, give up */
01748     if (tmp_time == -3) {
01749       P3M_TRACE(fprintf(stderr, "dp3m_m_time: no possible cao found\n"));
01750       return -2;
01751     }
01752     /* we have a valid time, start optimising from there */
01753     if (tmp_time >= 0) {
01754       best_time  = tmp_time;
01755       *_r_cut_iL = tmp_r_cut_iL;
01756       *_alpha_L  = tmp_alpha_L;
01757       *_accuracy = tmp_accuracy;
01758       *_cao      = cao;
01759       break;
01760     }
01761     /* the required accuracy could not be obtained, try higher caos. Therefore optimisation can only be
01762        obtained with even higher caos, but not lower ones */
01763     P3M_TRACE(fprintf(stderr, "tclcommand_inter_magnetic_p3m_print_m_time: doesn't give precision, step up\n"));
01764     cao++;
01765     final_dir = 1;
01766   }
01767   while (cao <= cao_max);
01768   /* with this mesh, the required accuracy cannot be obtained. */
01769   if (cao > cao_max) return -2;
01770 
01771   /* at the boundaries, only the opposite direction can be used for optimisation */
01772   if (cao == cao_min)      final_dir = 1;
01773   else if (cao == cao_max) final_dir = -1;
01774 
01775   P3M_TRACE(fprintf(stderr, "dp3m_m_time: final constraints dir %d\n", final_dir));
01776 
01777   if (final_dir == 0) {
01778     /* check in which direction we can optimise. Both directions are possible */
01779     double dir_times[3];
01780     for (final_dir = -1; final_dir <= 1; final_dir += 2) {
01781       dir_times[final_dir + 1] = tmp_time =
01782         dp3m_mc_time(log, mesh, cao + final_dir,  r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01783       /* bail out on errors, as usual */
01784       if (tmp_time == -1) return -1;
01785       /* in this direction, we cannot optimise, since we get into precision trouble */
01786       if (tmp_time < 0) continue;
01787 
01788       if (tmp_time < best_time) {
01789         best_time  = tmp_time;
01790         *_r_cut_iL = tmp_r_cut_iL;
01791         *_alpha_L  = tmp_alpha_L;
01792         *_accuracy = tmp_accuracy;
01793         *_cao      = cao + final_dir;
01794       }
01795     }
01796     /* choose the direction which was optimal, if any of the two */
01797     if      (dir_times[0] == best_time) { final_dir = -1; }
01798     else if (dir_times[2] == best_time) { final_dir = 1; }
01799     else {
01800       /* no improvement in either direction, however if one is only marginally worse, we can still try*/
01801       /* down is possible and not much worse, while up is either illegal or even worse */
01802       if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) &&
01803           (dir_times[2] < 0 || dir_times[2] > dir_times[0]))
01804         final_dir = -1;
01805       /* same for up */
01806       else if ((dir_times[2] >= 0 && dir_times[2] < best_time + P3M_TIME_GRAN) &&
01807                (dir_times[0] < 0 || dir_times[0] > dir_times[2]))
01808         final_dir = 1;
01809       else {
01810         /* really no chance for optimisation */
01811         P3M_TRACE(fprintf(stderr, "dp3m_m_time: Dmesh=%d final Dcao=%d time=%f\n",mesh, cao, best_time));
01812         return best_time;
01813       }
01814     }
01815     /* we already checked the initial cao and its neighbor */
01816     cao += 2*final_dir;
01817   }
01818   else {
01819     /* here some constraint is active, and we only checked the initial cao itself */
01820     cao += final_dir;
01821   }
01822 
01823   P3M_TRACE(fprintf(stderr, "dp3m_m_time: optimise in direction %d\n", final_dir));
01824 
01825   /* move cao into the optimisation direction until we do not gain anymore. */
01826   for (; cao >= cao_min && cao <= cao_max; cao += final_dir) {
01827     tmp_time = dp3m_mc_time(log, mesh, cao,  r_cut_iL_min, r_cut_iL_max,
01828                             &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy);
01829     /* bail out on errors, as usual */
01830     if (tmp_time == -1) return -1;
01831     /* if we cannot meet the precision anymore, give up */
01832     if (tmp_time < 0) break;
01833 
01834     if (tmp_time < best_time) {
01835       best_time  = tmp_time;
01836       *_r_cut_iL = tmp_r_cut_iL;
01837       *_alpha_L  = tmp_alpha_L;
01838       *_accuracy = tmp_accuracy;
01839       *_cao      = cao;
01840     }
01841     /* no hope of further optimisation */
01842     else if (tmp_time > best_time + P3M_TIME_GRAN)
01843       break;
01844   }
01845   P3M_TRACE(fprintf(stderr, "tclcommand_inter_magnetic_p3m_print_m_time: Dmesh=%d final Dcao=%d Dr_cut=%f time=%f\n",mesh, *_cao, *_r_cut_iL, best_time));
01846   return best_time;
01847 }
01848 
01849 
01850 /** Tuning of dipolar P3M. The algorithm
01851     basically determines the mesh, cao and then the real space cutoff, in this nested order.
01852 
01853     For each mesh, the cao optimal for the mesh tested previously is used as an initial guess,
01854     and the algorithm tries whether increasing or decreasing it leads to a better solution. This
01855     is efficient, since the optimal cao only changes little with the meshes in general.
01856 
01857     The real space cutoff for a given mesh and cao is determined via a bisection on the error estimate,
01858     which determines where the error estimate equals the required accuracy. Therefore the smallest 
01859     possible, i.e. fastest real space cutoff is determined.
01860 
01861     Both the search over mesh and cao stop to search in a specific direction once the computation time is
01862     significantly higher than the currently known optimum.
01863  */
01864 
01865 int dp3m_adaptive_tune(char **logger)
01866 {
01867   int    mesh_max,                   mesh     = -1, tmp_mesh;
01868   double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1, tmp_r_cut_iL=0.0;
01869   int    cao_min, cao_max,           cao      = -1, tmp_cao;
01870 
01871   double                             alpha_L  = -1, tmp_alpha_L=0.0;
01872   double                             accuracy = -1, tmp_accuracy=0.0;
01873   double                            time_best=1e20, tmp_time;
01874   char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128];
01875  
01876   P3M_TRACE(fprintf(stderr,"%d: dp3m_adaptive_tune\n",this_node));
01877 
01878   if (skin == -1) {
01879     *logger = strcat_alloc(*logger, "p3m cannot be tuned, since the skin is not yet set");
01880     return ES_ERROR;
01881   }
01882 
01883   /* preparation */
01884   mpi_bcast_event(P3M_COUNT_DIPOLES);
01885 
01886   /* Print Status */
01887   sprintf(b, "dipolar P3M tune parameters: Accuracy goal = %.5e\n", dp3m.params.accuracy);
01888   *logger = strcat_alloc(*logger, b);
01889   sprintf(b, "System: box_l = %.5e # charged part = %d Sum[q_i^2] = %.5e\n",
01890           box_l[0], dp3m.sum_dip_part, dp3m.sum_mu2);
01891   *logger = strcat_alloc(*logger, b);
01892 
01893   if (dp3m.sum_dip_part == 0) {
01894     *logger = strcat_alloc(*logger, "no dipolar particles in the system, cannot tune dipolar P3M");
01895     return ES_ERROR;
01896   }
01897   
01898   /* parameter ranges */
01899   if (dp3m.params.mesh[0] == 0 ) {
01900     double expo;
01901     expo = log(pow((double)dp3m.sum_dip_part,(1.0/3.0)))/log(2.0);  
01902 
01903     tmp_mesh = (int)(pow(2.0,(double)((int)expo))+0.1);
01904     /* this limits the tried meshes if the accuracy cannot
01905        be obtained with smaller meshes, but normally not all these
01906        meshes have to be tested */
01907     mesh_max = tmp_mesh * 256;
01908     /* avoid using more than 1 GB of FFT arrays (per default, see config.h) */
01909     if (mesh_max > P3M_MAX_MESH)
01910       mesh_max = P3M_MAX_MESH;
01911   }
01912   else {
01913     tmp_mesh = mesh_max = dp3m.params.mesh[0];
01914 
01915     sprintf(b, "fixed mesh %d\n", dp3m.params.mesh[0]);
01916     *logger = strcat_alloc(*logger, b);
01917   }
01918 
01919   if(dp3m.params.r_cut_iL == 0.0) {
01920     r_cut_iL_min = 0;
01921     r_cut_iL_max = dmin(min_local_box_l, min_box_l/2) - skin;
01922     r_cut_iL_min *= box_l_i[0];
01923     r_cut_iL_max *= box_l_i[0];
01924   }
01925   else {
01926     r_cut_iL_min = r_cut_iL_max = dp3m.params.r_cut_iL;
01927 
01928     sprintf(b, "fixed r_cut_iL %f\n", dp3m.params.r_cut_iL);
01929     *logger = strcat_alloc(*logger, b);
01930   }
01931 
01932   if(dp3m.params.cao == 0) {
01933     cao_min = 1;
01934     cao_max = 7;
01935     cao = 3;
01936   }
01937   else {
01938     cao_min = cao_max = cao = dp3m.params.cao;
01939 
01940     sprintf(b, "fixed cao %d\n", dp3m.params.cao);
01941     *logger = strcat_alloc(*logger, b);
01942   }
01943 
01944   *logger = strcat_alloc(*logger, "Dmesh cao Dr_cut_iL   Dalpha_L     Derr         Drs_err    Dks_err    time [ms]\n");
01945 
01946   /* mesh loop */
01947   for (;tmp_mesh <= mesh_max; tmp_mesh *= 2) {
01948     tmp_cao = cao;
01949     tmp_time = dp3m_m_time(logger, tmp_mesh,
01950                            cao_min, cao_max, &tmp_cao,
01951                            r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL,
01952                            &tmp_alpha_L, &tmp_accuracy);
01953     /* some error occured during the tuning force evaluation */
01954     if (tmp_time == -1) return ES_ERROR;
01955     /* this mesh does not work at all */
01956     if (tmp_time < 0) continue;
01957 
01958     /* the optimum r_cut for this mesh is the upper limit for higher meshes,
01959        everything else is slower */
01960     r_cut_iL_max = tmp_r_cut_iL;
01961 
01962     /* new optimum */
01963     if (tmp_time < time_best) {
01964       time_best = tmp_time;
01965       mesh      = tmp_mesh;
01966       cao       = tmp_cao;
01967       r_cut_iL  = tmp_r_cut_iL;
01968       alpha_L   = tmp_alpha_L;
01969       accuracy  = tmp_accuracy;
01970     }
01971     /* no hope of further optimisation */
01972     else if (tmp_time > time_best + P3M_TIME_GRAN)
01973       break;
01974   }
01975   
01976   P3M_TRACE(fprintf(stderr,"finshed tuning\n"));
01977   if(time_best == 1e20) {
01978     *logger = strcat_alloc(*logger, "failed to tune dipolar P3M parameters to required accuracy\n");
01979     return ES_ERROR;
01980   }
01981 
01982   /* set tuned p3m parameters */
01983   dp3m.params.r_cut_iL = r_cut_iL;
01984   dp3m.params.mesh[0]  = dp3m.params.mesh[1] = dp3m.params.mesh[2] = mesh;
01985   dp3m.params.cao      = cao;
01986   dp3m.params.alpha_L  = alpha_L;
01987   dp3m.params.accuracy = accuracy;
01988   dp3m_scaleby_box_l();
01989   /* broadcast tuned p3m parameters */
01990   mpi_bcast_coulomb_params();
01991   /* Tell the user about the outcome */
01992   sprintf(b, "\nresulting parameters:\n%-4d %-3d %.5e %.5e %.5e %-8d\n",
01993           mesh, cao, r_cut_iL, alpha_L, accuracy, (int)time_best);
01994   *logger = strcat_alloc(*logger, b);
01995   return ES_OK;
01996 }
01997 
01998 /*****************************************************************************/
01999 
02000 void p3m_print_dp3m_struct(p3m_parameter_struct ps) {
02001   fprintf(stderr,"%d: dipolar p3m_parameter_struct: \n",this_node);
02002   fprintf(stderr,"   alpha_L=%f, r_cut_iL=%f \n",
02003           ps.alpha_L,ps.r_cut_iL);
02004   fprintf(stderr,"   mesh=(%d,%d,%d), mesh_off=(%.4f,%.4f,%.4f)\n",
02005           ps.mesh[0],ps.mesh[1],ps.mesh[2],
02006           ps.mesh_off[0],ps.mesh_off[1],ps.mesh_off[2]);
02007   fprintf(stderr,"   Dcao=%d, Dinter=%d, Depsilon=%f\n",
02008           ps.cao,ps.inter,ps.epsilon);
02009   fprintf(stderr,"   Dcao_cut=(%f,%f,%f)\n",
02010           ps.cao_cut[0],ps.cao_cut[1],ps.cao_cut[2]);
02011   fprintf(stderr,"   Da=(%f,%f,%f), Dai=(%f,%f,%f)\n",
02012           ps.a[0],ps.a[1],ps.a[2],ps.ai[0],ps.ai[1],ps.ai[2]);
02013 }
02014 
02015 /*****************************************************************************/
02016 
02017 void dp3m_count_magnetic_particles()
02018 {  
02019   Cell *cell;
02020   Particle *part;
02021   int i,c,np;
02022   double node_sums[2], tot_sums[2];
02023 
02024   for(i=0;i<2;i++)
02025     { node_sums[i]=0.0; tot_sums[i]=0.0;}
02026 
02027   for (c = 0; c < local_cells.n; c++) {
02028     cell = local_cells.cell[c];
02029     part = cell->part;
02030     np   = cell->n;
02031     for(i=0;i<np;i++) {
02032      if( part[i].p.dipm != 0.0 ) {
02033         node_sums[0] += SQR(part[i].r.dip[0])
02034                        +SQR(part[i].r.dip[1])
02035                        +SQR(part[i].r.dip[2]);
02036         node_sums[1] += 1.0;           
02037       }
02038     }
02039   }
02040   
02041   MPI_Allreduce(node_sums, tot_sums, 2, MPI_DOUBLE, MPI_SUM, comm_cart);
02042   dp3m.sum_mu2 = tot_sums[0];
02043   dp3m.sum_dip_part    = (int)(tot_sums[1]+0.1);  
02044 }
02045 
02046 
02047 
02048 /*****************************************************************************/
02049 
02050 /* Following are the two functions for computing the error in dipolar P3M and 
02051    tune the parameters to minimize the time with the desired accuracy.
02052    
02053    
02054    This functions are called by the functions: dp3m_get_accuracy() and
02055    tclcommand_inter_magnetic_dp3m_print_tune_parameters.
02056   
02057 */
02058 
02059 
02060 /*****************************************************************************/
02061 
02062 
02063 
02064    
02065 static double dp3m_k_space_error(double box_size, double prefac, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
02066 {
02067   int  nx, ny, nz;
02068   double he_q = 0.0, mesh_i = 1./mesh, alpha_L_i = 1./alpha_L;
02069   double alias1, alias2, n2, cs;
02070 
02071   for (nx=-mesh/2; nx<mesh/2; nx++)
02072     for (ny=-mesh/2; ny<mesh/2; ny++)
02073       for (nz=-mesh/2; nz<mesh/2; nz++)
02074         if((nx!=0) || (ny!=0) || (nz!=0)) {
02075           n2 = SQR(nx) + SQR(ny) + SQR(nz);
02076           cs = p3m_analytic_cotangent_sum(nx,mesh_i,cao)*
02077                p3m_analytic_cotangent_sum(ny,mesh_i,cao)*
02078                p3m_analytic_cotangent_sum(nz,mesh_i,cao);
02079           dp3m_tune_aliasing_sums(nx,ny,nz,mesh,mesh_i,cao,alpha_L_i,&alias1,&alias2);
02080           double d = alias1  -  SQR(alias2/cs) / (n2*n2*n2);
02081           /* at high precisions, d can become negative due to extinction;
02082              also, don't take values that have no significant digits left*/
02083           if (d > 0 && (fabs(d/alias1) > ROUND_ERROR_PREC))
02084             he_q += d;
02085         }
02086 
02087   return 8.*PI*PI/3.*sum_q2*sqrt(he_q/(double)n_c_part) / (box_size*box_size*box_size*box_size);
02088 }
02089    
02090 
02091 void dp3m_tune_aliasing_sums(int nx, int ny, int nz,  int mesh, double mesh_i, int cao, double alpha_L_i,  double *alias1, double *alias2)
02092 {
02093   int    mx,my,mz;
02094   double nmx,nmy,nmz;
02095   double fnmx,fnmy,fnmz;
02096 
02097   double ex,ex2,nm2,U2,factor1;
02098 
02099   factor1 = SQR(PI*alpha_L_i);
02100 
02101   *alias1 = *alias2 = 0.0;
02102   for (mx=-P3M_BRILLOUIN; mx<=P3M_BRILLOUIN; mx++) {
02103     fnmx = mesh_i * (nmx = nx + mx*mesh);
02104     for (my=-P3M_BRILLOUIN; my<=P3M_BRILLOUIN; my++) {
02105       fnmy = mesh_i * (nmy = ny + my*mesh);
02106       for (mz=-P3M_BRILLOUIN; mz<=P3M_BRILLOUIN; mz++) {
02107         fnmz = mesh_i * (nmz = nz + mz*mesh);
02108         
02109         nm2 = SQR(nmx) + SQR(nmy) + SQR(nmz);
02110         ex2 = SQR( ex = exp(-factor1*nm2) );
02111         
02112         U2 = pow(sinc(fnmx)*sinc(fnmy)*sinc(fnmz), 2.0*cao);
02113         
02114         *alias1 += ex2 * nm2;
02115         *alias2 += U2 * ex * pow((nx*nmx + ny*nmy + nz*nmz),3.) / nm2;
02116      }
02117     }
02118   }
02119 }
02120 
02121 
02122 
02123 
02124 
02125 //----------------------------------------------------------
02126 //  Function used to calculate the value of the errors
02127 //  for the REAL part of the force in terms of the Spliting parameter alpha of Ewald
02128 //  based on the formulas 33, the paper of Zuowei-HolmJCP, 115,6351,(2001).
02129 
02130 //  Please, notice than in this more refined approach we don't use
02131 //  formulas 37, but 33 which mantains all the powers in alpha
02132 
02133 //------------------------------------------------------------
02134 
02135 
02136 
02137    
02138  double P3M_DIPOLAR_real_space_error(double box_size, double prefac, double r_cut_iL,  int n_c_part, double sum_q2, double alpha_L)
02139 {
02140   double d_error_f,d_cc,d_dc,d_rcut2,d_con;
02141   double d_a2,d_c,d_RCUT;
02142  
02143    
02144    
02145    
02146   d_RCUT=r_cut_iL*box_size;
02147   d_rcut2=d_RCUT*d_RCUT;
02148   
02149   
02150  d_a2=alpha_L*alpha_L/(box_size*box_size);
02151 
02152  d_c =sum_q2*exp(-d_a2*d_RCUT*d_RCUT);
02153 
02154  d_cc=4.0*d_a2*d_a2*d_rcut2*d_rcut2+6.0*d_a2*d_rcut2+3.0;
02155 
02156 
02157  d_dc=8.0*d_a2*d_a2*d_a2*d_rcut2*d_rcut2*d_rcut2+20.0*d_a2*d_a2*d_rcut2*d_rcut2 \
02158       +30*d_a2*d_rcut2+15.0;
02159 
02160  d_con=1.0/sqrt(box_size*box_size*box_size*d_a2*d_a2*d_rcut2*d_rcut2*d_rcut2*d_rcut2*d_RCUT*(double)n_c_part);
02161 
02162 
02163  d_error_f=d_c*d_con*sqrt((13./6.)*d_cc*d_cc+(2./15.)*d_dc*d_dc-(13./15.)*d_cc*d_dc);
02164  
02165  
02166   return d_error_f;
02167 }
02168 
02169   
02170 
02171 /*****************************************************************************/
02172   
02173     
02174 // Using bisection find the root of a function "func-tuned_accuracy/sqrt(2.)" known to lie
02175 //between x1 and x2. The root, returned as rtbis, will be refined
02176 //until its accuracy is +-xacc.
02177 
02178 double dp3m_rtbisection( double box_size, double prefac, double r_cut_iL,  int n_c_part, double sum_q2,  double x1, double x2, double xacc, double tuned_accuracy)
02179 {
02180  int j;
02181  double dx,f,fmid,xmid,rtb,constant,JJ_RTBIS_MAX=40;
02182  
02183  constant=tuned_accuracy/sqrt(2.);
02184  
02185  
02186  f=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2,       x1)-constant;
02187  fmid=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2,    x2)-constant;
02188  if(f*fmid >=0.0) fprintf(stderr,"Root must be bracketed for bisection in dp3m_rtbisection \n");
02189  rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2);  // Orient the search dx, and set rtb to x1 or x2 ...
02190  for (j=1;j<=JJ_RTBIS_MAX;j++){
02191    fmid=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2,  xmid=rtb+(dx *= 0.5))-constant;
02192    if(fmid<=0.0) rtb=xmid;
02193    if(fabs(dx) < xacc || fmid == 0.0) return rtb;
02194   }
02195   fprintf(stderr,"Too many bisections in JJ_rtbissection \n");
02196   return -9999999.9999;  
02197 
02198 }       
02199  
02200  
02201 
02202 /************************************************************/
02203 
02204 
02205 void dp3m_calc_lm_ld_pos() {
02206   int i; 
02207    for(i=0;i<3;i++) {
02208     dp3m.local_mesh.ld_pos[i] = (dp3m.local_mesh.ld_ind[i]+ dp3m.params.mesh_off[i])*dp3m.params.a[i];
02209   }
02210 }
02211 
02212 
02213 
02214 /************************************************************/
02215 
02216 
02217 void dp3m_init_a_ai_cao_cut() {
02218   int i; 
02219    for(i=0;i<3;i++) {
02220     dp3m.params.ai[i]      = (double)dp3m.params.mesh[i]/box_l[i]; 
02221     dp3m.params.a[i]       = 1.0/dp3m.params.ai[i];
02222     dp3m.params.cao_cut[i] = 0.5*dp3m.params.a[i]*dp3m.params.cao;
02223   }
02224 }
02225 
02226 
02227 
02228 /************************************************************/
02229 
02230 void dp3m_calc_local_ca_mesh() {
02231   int i;
02232   int ind[3];
02233   /* total skin size */
02234   double full_skin[3];
02235   
02236    for(i=0;i<3;i++)
02237     full_skin[i]= dp3m.params.cao_cut[i]+skin+dp3m.params.additional_mesh[i];
02238 
02239   /* inner left down grid point (global index) */
02240   for(i=0;i<3;i++) dp3m.local_mesh.in_ld[i] = (int)ceil(my_left[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i]);
02241   /* inner up right grid point (global index) */
02242   for(i=0;i<3;i++) dp3m.local_mesh.in_ur[i] = (int)floor(my_right[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i]);
02243   
02244   /* correct roundof errors at boundary */
02245   for(i=0;i<3;i++) {
02246     if((my_right[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-dp3m.local_mesh.in_ur[i]<ROUND_ERROR_PREC) dp3m.local_mesh.in_ur[i]--;
02247     if(1.0+(my_left[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-dp3m.local_mesh.in_ld[i]<ROUND_ERROR_PREC) dp3m.local_mesh.in_ld[i]--;
02248   }
02249   /* inner grid dimensions */
02250   for(i=0;i<3;i++) dp3m.local_mesh.inner[i] = dp3m.local_mesh.in_ur[i] - dp3m.local_mesh.in_ld[i] + 1;
02251   /* index of left down grid point in global mesh */
02252   for(i=0;i<3;i++) 
02253     dp3m.local_mesh.ld_ind[i]=(int)ceil((my_left[i]-full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i]);
02254   /* spacial position of left down mesh point */
02255   dp3m_calc_lm_ld_pos();
02256   /* left down margin */
02257   for(i=0;i<3;i++) dp3m.local_mesh.margin[i*2] = dp3m.local_mesh.in_ld[i]-dp3m.local_mesh.ld_ind[i];
02258   /* up right grid point */
02259   for(i=0;i<3;i++) ind[i]=(int)floor((my_right[i]+full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i]);
02260   /* correct roundof errors at up right boundary */
02261   for(i=0;i<3;i++)
02262     if(((my_right[i]+full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-ind[i]==0) ind[i]--;
02263   /* up right margin */
02264   for(i=0;i<3;i++) dp3m.local_mesh.margin[(i*2)+1] = ind[i] - dp3m.local_mesh.in_ur[i];
02265 
02266   /* grid dimension */
02267   dp3m.local_mesh.size=1; 
02268   for(i=0;i<3;i++) {dp3m.local_mesh.dim[i] = ind[i] - dp3m.local_mesh.ld_ind[i] + 1; dp3m.local_mesh.size*=dp3m.local_mesh.dim[i];}
02269   /* reduce inner grid indices from global to local */
02270   for(i=0;i<3;i++) dp3m.local_mesh.in_ld[i] = dp3m.local_mesh.margin[i*2];
02271   for(i=0;i<3;i++) dp3m.local_mesh.in_ur[i] = dp3m.local_mesh.margin[i*2]+dp3m.local_mesh.inner[i];
02272 
02273   dp3m.local_mesh.q_2_off  = dp3m.local_mesh.dim[2] - dp3m.params.cao;
02274   dp3m.local_mesh.q_21_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao);
02275    
02276 }
02277 
02278 /*****************************************************************************/
02279 
02280 
02281 int dp3m_sanity_checks_boxl() {
02282   char *errtxt;
02283   int i, ret = 0;
02284      for(i=0;i<3;i++) {
02285     /* check k-space cutoff */
02286     if(dp3m.params.cao_cut[i] >= 0.5*box_l[i]) {
02287       errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE);
02288       ERROR_SPRINTF(errtxt,"{039 dipolar P3M_init: k-space cutoff %g is larger than half of box dimension %g} ",dp3m.params.cao_cut[i],box_l[i]);
02289       ret = 1;
02290     }
02291     if(dp3m.params.cao_cut[i] >= local_box_l[i]) {
02292       errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE);
02293       ERROR_SPRINTF(errtxt,"{040 dipolar P3M_init: k-space cutoff %g is larger than local box dimension %g} ",dp3m.params.cao_cut[i],local_box_l[i]);
02294       ret = 1;
02295     }
02296   }
02297    return ret;
02298 }
02299 
02300 /*****************************************************************************/
02301 
02302 
02303 int dp3m_sanity_checks()
02304 {
02305   char *errtxt;
02306   int ret = 0;
02307 
02308   if (!PERIODIC(0) || !PERIODIC(1) || !PERIODIC(2)) {
02309     errtxt = runtime_error(128);
02310     ERROR_SPRINTF(errtxt, "{041 dipolar P3M requires periodicity 1 1 1} ");
02311     ret = 1;
02312   }
02313   /*
02314   if (n_nodes != 1) {
02315     errtxt = runtime_error(128);
02316     ERROR_SPRINTF(errtxt, "{110 dipolar P3M does not run in parallel} ");
02317     ret = 1;
02318   } */
02319   if (cell_structure.type != CELL_STRUCTURE_DOMDEC) {
02320     errtxt = runtime_error(128);
02321     ERROR_SPRINTF(errtxt, "{042 dipolar P3M at present requires the domain decomposition cell system} ");
02322     ret = 1;
02323   }
02324   
02325   if( (box_l[0] != box_l[1]) || (box_l[1] != box_l[2]) ) {
02326     errtxt = runtime_error(128);
02327     ERROR_SPRINTF(errtxt,"{043 dipolar P3M requires a cubic box} ");
02328     ret = 1;
02329   }
02330 
02331     if( (dp3m.params.mesh[0] != dp3m.params.mesh[1]) || (dp3m.params.mesh[1] != dp3m.params.mesh[2]) ) {
02332     errtxt = runtime_error(128);
02333     ERROR_SPRINTF(errtxt, "{044 dipolar P3M requires a cubic mesh} ");
02334     ret = 1;
02335   }
02336 
02337   if (dp3m_sanity_checks_boxl()) ret = 1;
02338 
02339   if (skin == -1) {
02340     errtxt = runtime_error(128);
02341     ERROR_SPRINTF(errtxt,"{047 dipolar P3M_init: skin is not yet set} ");
02342     ret = 1;
02343   }
02344   
02345   if( dp3m.params.mesh[0] == 0) {
02346     errtxt = runtime_error(128);
02347     ERROR_SPRINTF(errtxt,"{045 dipolar P3M_init: mesh size is not yet set} ");
02348     ret = 1;
02349   }
02350   if( dp3m.params.cao == 0) {
02351     errtxt = runtime_error(128);
02352     ERROR_SPRINTF(errtxt,"{046 dipolar P3M_init: cao is not yet set} ");
02353     ret = 1;
02354   }
02355   if(node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) {
02356     errtxt = runtime_error(128);
02357     ERROR_SPRINTF(errtxt,"{046 dipolar P3M_init: node grid must be sorted, largest first} ");
02358     ret = 1;
02359   }
02360   
02361   return ret;
02362 }
02363 
02364 
02365 /*****************************************************************************/
02366 
02367 
02368 void dp3m_calc_send_mesh()
02369 {
02370   int i,j,evenodd;
02371   int done[3]={0,0,0};
02372   MPI_Status status;
02373   /* send grids */
02374   for(i=0;i<3;i++) {
02375     for(j=0;j<3;j++) {
02376       /* left */
02377       dp3m.sm.s_ld[i*2][j] = 0 + done[j]*dp3m.local_mesh.margin[j*2];
02378       if(j==i) dp3m.sm.s_ur[i*2][j] = dp3m.local_mesh.margin[j*2]; 
02379       else     dp3m.sm.s_ur[i*2][j] = dp3m.local_mesh.dim[j]-done[j]*dp3m.local_mesh.margin[(j*2)+1];
02380       /* right */
02381       if(j==i) dp3m.sm.s_ld[(i*2)+1][j] = dp3m.local_mesh.in_ur[j];
02382       else     dp3m.sm.s_ld[(i*2)+1][j] = 0 + done[j]*dp3m.local_mesh.margin[j*2];
02383       dp3m.sm.s_ur[(i*2)+1][j] = dp3m.local_mesh.dim[j] - done[j]*dp3m.local_mesh.margin[(j*2)+1];
02384     }   
02385     done[i]=1;
02386   }
02387   dp3m.sm.max=0;
02388   for(i=0;i<6;i++) {
02389     dp3m.sm.s_size[i] = 1;
02390     for(j=0;j<3;j++) {
02391       dp3m.sm.s_dim[i][j] = dp3m.sm.s_ur[i][j]-dp3m.sm.s_ld[i][j];
02392       dp3m.sm.s_size[i] *= dp3m.sm.s_dim[i][j];
02393     }
02394     if(dp3m.sm.s_size[i]>dp3m.sm.max) dp3m.sm.max=dp3m.sm.s_size[i];
02395   }
02396   /* communication */
02397   for(i=0;i<6;i++) {
02398     if(i%2==0) j = i+1;
02399     else       j = i-1;
02400     if(node_neighbors[i] != this_node) {
02401       /* two step communication: first all even positions than all odd */
02402       for(evenodd=0; evenodd<2;evenodd++) {
02403         if((node_pos[i/2]+evenodd)%2==0)
02404           MPI_Send(&(dp3m.local_mesh.margin[i]), 1, MPI_INT, 
02405                    node_neighbors[i],REQ_P3M_INIT_D,comm_cart);
02406         else
02407           MPI_Recv(&(dp3m.local_mesh.r_margin[j]), 1, MPI_INT,
02408                    node_neighbors[j],REQ_P3M_INIT_D,comm_cart,&status);    
02409       }
02410     }
02411     else {
02412       dp3m.local_mesh.r_margin[j] = dp3m.local_mesh.margin[i];
02413     }
02414   }
02415   /* recv grids */
02416   for(i=0;i<3;i++) 
02417     for(j=0;j<3;j++) {
02418       if(j==i) {
02419         dp3m.sm.r_ld[ i*2   ][j] = dp3m.sm.s_ld[ i*2   ][j] + dp3m.local_mesh.margin[2*j];
02420         dp3m.sm.r_ur[ i*2   ][j] = dp3m.sm.s_ur[ i*2   ][j] + dp3m.local_mesh.r_margin[2*j];
02421         dp3m.sm.r_ld[(i*2)+1][j] = dp3m.sm.s_ld[(i*2)+1][j] - dp3m.local_mesh.r_margin[(2*j)+1];
02422         dp3m.sm.r_ur[(i*2)+1][j] = dp3m.sm.s_ur[(i*2)+1][j] - dp3m.local_mesh.margin[(2*j)+1];
02423       }
02424       else {
02425         dp3m.sm.r_ld[ i*2   ][j] = dp3m.sm.s_ld[ i*2   ][j];
02426         dp3m.sm.r_ur[ i*2   ][j] = dp3m.sm.s_ur[ i*2   ][j];
02427         dp3m.sm.r_ld[(i*2)+1][j] = dp3m.sm.s_ld[(i*2)+1][j];
02428         dp3m.sm.r_ur[(i*2)+1][j] = dp3m.sm.s_ur[(i*2)+1][j];
02429       }
02430     }
02431   for(i=0;i<6;i++) {
02432     dp3m.sm.r_size[i] = 1;
02433     for(j=0;j<3;j++) {
02434       dp3m.sm.r_dim[i][j] = dp3m.sm.r_ur[i][j]-dp3m.sm.r_ld[i][j];
02435       dp3m.sm.r_size[i] *= dp3m.sm.r_dim[i][j];
02436     }
02437     if(dp3m.sm.r_size[i]>dp3m.sm.max) dp3m.sm.max=dp3m.sm.r_size[i];
02438   }
02439   
02440   
02441 }
02442 
02443 
02444 
02445 /************************************************/
02446 
02447 void dp3m_scaleby_box_l() {
02448   if (coulomb.Dbjerrum == 0.0) {
02449     return;
02450   }
02451 
02452   dp3m.params.r_cut = dp3m.params.r_cut_iL* box_l[0];
02453   dp3m.params.alpha = dp3m.params.alpha_L * box_l_i[0];  
02454   dp3m_init_a_ai_cao_cut();
02455   dp3m_calc_lm_ld_pos();
02456   dp3m_sanity_checks_boxl();
02457 
02458   dp3m_calc_influence_function_force();
02459   dp3m_calc_influence_function_energy();
02460 }
02461 
02462 /*****************************************************************************/
02463  
02464  
02465 /* fucntion to give the dipolar-P3M energy  correction -------*/
02466 void dp3m_compute_constants_energy_dipolar() {
02467   double  Eself, Ukp3m;
02468 
02469   if (dp3m.energy_correction != 0.0)
02470     return;
02471       
02472   P3M_TRACE(fprintf(stderr, "%d: dp3m_compute_constants_energy_dipolar().\n", this_node));
02473       
02474   double volume=box_l[0]*box_l[1]*box_l[2];
02475   Ukp3m=dp3m_average_dipolar_self_energy(box_l[0],dp3m.params.mesh[0])*volume;
02476 
02477   P3M_TRACE(fprintf(stderr, "%d: Average Dipolar Energy = %lf.\n", this_node, Ukp3m));
02478              
02479   Eself=-(2*pow(dp3m.params.alpha_L,3) * wupii/3.0);
02480 
02481   dp3m.energy_correction = - dp3m.sum_mu2*(Ukp3m+Eself+2.*PI/3.);
02482 } 
02483 
02484 /*****************************************************************************/
02485 
02486 
02487 /*****************************************************************************/
02488 
02489 #endif /* DP3M */
02490