ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
mmm2d.c
Go to the documentation of this file.
00001 /*
00002   Copyright (C) 2010,2012,2013 The ESPResSo project
00003   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
00004     Max-Planck-Institute for Polymer Research, Theory Group
00005   
00006   This file is part of ESPResSo.
00007   
00008   ESPResSo is free software: you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation, either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   ESPResSo is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
00020 */
00021 /** \file mmm2d.c  MMM2D algorithm for long range coulomb interaction.
00022  *
00023  *  For more information about MMM2D, see \ref mmm2d.h "mmm2d.h".
00024  */
00025 
00026 #include <math.h>
00027 #include <mpi.h>
00028 #include "utils.h"
00029 #include "communication.h"
00030 #include "grid.h"
00031 #include "particle_data.h"
00032 #include "interaction_data.h"
00033 #include "cells.h"
00034 #include "mmm2d.h"
00035 #include "mmm-common.h"
00036 #include "specfunc.h"
00037 #include "integrate.h"
00038 #include "layered.h"
00039 
00040 #ifdef ELECTROSTATICS
00041 char const *mmm2d_errors[] = {
00042    "ok",
00043    "Layer height too large for MMM2D near formula, increase n_layers",
00044    "box_l[1]/box_l[0] too large for MMM2D near formula, please exchange x and y",
00045    "Could find not reasonable Bessel cutoff. Please decrease n_layers or the error bound",
00046    "Could find not reasonable Polygamma cutoff. Consider exchanging x and y",
00047    "Far cutoff too large, decrease the error bound",
00048    "Layer height too small for MMM2D far formula, decrease n_layers or skin",
00049    "IC requires layered cellsystem with more than 3 layers",
00050 };
00051 
00052 /** if you define this, the Besselfunctions are calculated up
00053     to machine precision, otherwise 10^-14, which should be
00054     definitely enough for daily life. */
00055 #undef BESSEL_MACHINE_PREC
00056 
00057 // #define CHECKPOINTS
00058 #if 0
00059 #define LOG_FORCES(x) x
00060 #else
00061 #define LOG_FORCES(x)
00062 #endif
00063 
00064 #ifndef BESSEL_MACHINE_PREC
00065 #define K0 LPK0
00066 #define K1 LPK1
00067 #endif
00068 
00069 /****************************************
00070  * LOCAL DEFINES
00071  ****************************************/
00072 
00073 /** Largest reasonable cutoff for far formula. A double cannot overflow
00074     with this value. */
00075 #define MAXIMAL_FAR_CUT 100
00076 
00077 /** Largest reasonable cutoff for Bessel function. The Bessel functions
00078     are quite slow, so do not make too large. */
00079 #define MAXIMAL_B_CUT 50
00080 
00081 /** Largest reasonable order of polygamma series. These are pretty fast,
00082     so use more of them. Also, the real cutoff is determined at run time,
00083     so normally we are faster */
00084 #define MAXIMAL_POLYGAMMA 100
00085 
00086 /** internal relative precision of far formula. This controls how many
00087     p,q vectors are done at once. This has nothing to do with the effective
00088     precision, but rather controls how different values can be we add up without
00089     loosing the smallest values. In principle one could choose smaller values, but
00090     that would not make things faster */
00091 #define FARRELPREC 1e-6
00092 
00093 /** number of steps in the complex cutoff table */
00094 #define COMPLEX_STEP 16
00095 /** map numbers from 0 to 1/2 onto the complex cutoff table
00096     (with security margin) */
00097 #define COMPLEX_FAC (COMPLEX_STEP/(.5 + 0.01))
00098 
00099 /****************************************
00100  * LOCAL VARIABLES
00101  ****************************************/
00102 
00103 /** up to that error the sums in the NF are evaluated */
00104 static double part_error;
00105 
00106 /** cutoffs for the bessel sum */
00107 static IntList besselCutoff = {NULL, 0, 0};
00108 
00109 /** cutoffs for the complex sum */
00110 static int  complexCutoff[COMPLEX_STEP + 1];
00111 /** bernoulli numbers divided by n */
00112 static DoubleList  bon = {NULL, 0, 0};
00113 
00114 /** inverse box dimensions */
00115 /*@{*/
00116 static double ux, ux2, uy, uy2, uz;
00117 /*@}*/
00118 
00119 /** maximal z for near formula, minimal z for far formula.
00120     Is identical in the theory, but with the verlet tricks
00121     this is no longer true, the skin has to be added/subtracted */
00122 /*@{*/
00123 static double max_near, min_far;
00124 /*@}*/
00125 
00126 ///
00127 static double self_energy;
00128 
00129 MMM2D_struct mmm2d_params = { 1e100, 10, 1, 0, 0, 1, 1, 1 };
00130 
00131 /** return codes for \ref MMM2D_tune_near and \ref MMM2D_tune_far */
00132 /*@{*/
00133 /** cell too large */
00134 #define ERROR_LARGE 1
00135 /** box too large */
00136 #define ERROR_BOXL 2
00137 /** no reasonable bessel cutoff found */
00138 #define ERROR_BESSEL 3
00139 /** no reasonable polygamma cutoff found */
00140 #define ERROR_POLY 4
00141 /** no reasonable cutoff for the far formula found */
00142 #define ERROR_FARC 5
00143 /** cell too small */
00144 #define ERROR_SMALL 6
00145 /** IC layer requirement */
00146 #define ERROR_ICL 7
00147 /*@}*/
00148 
00149 /****************************************
00150  * LOCAL ARRAYS
00151  ****************************************/
00152 
00153 /** \name Product decomposition data organization
00154     For the cell blocks
00155     it is assumed that the lower blocks part is in the lower half.
00156     This has to have positive sign, so that has to be first. */
00157 /*@{*/
00158 
00159 #define POQESP 0
00160 #define POQECP 1
00161 #define POQESM 2
00162 #define POQECM 3
00163 
00164 #define PQESSP 0
00165 #define PQESCP 1
00166 #define PQECSP 2
00167 #define PQECCP 3
00168 #define PQESSM 4
00169 #define PQESCM 5
00170 #define PQECSM 6
00171 #define PQECCM 7
00172 
00173 #define QQEQQP 0
00174 #define QQEQQM 1
00175 
00176 #define ABEQQP 0
00177 #define ABEQZP 1
00178 #define ABEQQM 2
00179 #define ABEQZM 3
00180 
00181 /*@}*/
00182 
00183 /** number of local particles */
00184 static int n_localpart = 0;
00185 
00186 /** temporary buffers for product decomposition */
00187 static double *partblk = NULL;
00188 /** for all local cells including ghosts */
00189 static double *lclcblk = NULL;
00190 /** collected data from the cells above the top neighbor
00191     of a cell rsp. below the bottom neighbor
00192     (P=below, M=above, as the signs in the exp). */
00193 static double *gblcblk = NULL;
00194 
00195 /** contribution from the image charges */
00196 static double lclimge[8]; 
00197 
00198 typedef struct {
00199   double s, c;
00200 } SCCache;
00201 
00202 /** sin/cos caching */ 
00203 static SCCache *scxcache = NULL;
00204 static int    n_scxcache;  
00205 /** sin/cos caching */ 
00206 static SCCache *scycache = NULL;
00207 static int    n_scycache;  
00208 
00209 
00210 /** \name Local functions for the near formula */
00211 /************************************************************/
00212 /*@{*/
00213 
00214 /** complex evaluation */
00215 static void prepareBernoulliNumbers(int nmax);
00216 
00217 /** cutoff error setup. Returns error code */
00218 static int MMM2D_tune_near(double error);
00219 
00220 /** energy of all local particles with their copies */
00221 void MMM2D_self_energy();
00222 
00223 /*@}*/
00224 
00225 /** \name Local functions for the far formula */
00226 /************************************************************/
00227 /*@{*/
00228 
00229 /** sin/cos storage */
00230 static void prepare_scx_cache();
00231 static void prepare_scy_cache();
00232 /** clear the image contributions if there is no dielectric contrast and no image charges */
00233 static void clear_image_contributions(int size);
00234 /** gather the informations for the far away image charges */
00235 static void gather_image_contributions(int size);
00236 /** spread the top/bottom sums */
00237 static void distribute(int size, double fac);
00238 /** 2 pi |z| code */
00239 static void setup_z_force();
00240 static void setup_z_energy();
00241 static void   add_z_force();
00242 static double     z_energy();
00243 /** p=0 per frequency code */
00244 static void setup_P(int p, double omega, double fac);
00245 static void   add_P_force();
00246 static double     P_energy(double omega);
00247 /** q=0 per frequency code */
00248 static void setup_Q(int q, double omega, double fac);
00249 static void   add_Q_force();
00250 static double     Q_energy(double omega);
00251 /** p,q <> 0 per frequency code */
00252 static void setup_PQ(int p, int q, double omega, double fac);
00253 static void   add_PQ_force(int p, int q, double omega);
00254 static double     PQ_energy(double omega);
00255 
00256 /** cutoff error setup. Returns error code */
00257 static int MMM2D_tune_far(double error);
00258 
00259 /*@}*/
00260 
00261 ///
00262 void MMM2D_setup_constants()
00263 {
00264   ux  = 1/box_l[0];
00265   ux2 = ux*ux;
00266   uy  = 1/box_l[1];
00267   uy2 = uy*uy;  
00268   uz  = 1/box_l[2];
00269 
00270   switch (cell_structure.type) {
00271   case CELL_STRUCTURE_NSQUARE:
00272     max_near = box_l[2];
00273     /* not used */
00274     min_far = 0.0;
00275     break;
00276   case CELL_STRUCTURE_LAYERED:
00277     max_near = 2*layer_h + skin;
00278     min_far  =   layer_h - skin;    
00279     break;
00280   default:
00281     fprintf(stderr, "%d: INTERNAL ERROR: MMM2D setup for cell structure it should reject\n", this_node);
00282     errexit();
00283   }
00284 }
00285 
00286 /****************************************
00287  * FAR FORMULA
00288  ****************************************/
00289 
00290 static void prepare_scx_cache()
00291 {
00292   int np, c, i, ic, freq, o;
00293   double pref, arg;
00294   Particle *part;
00295   
00296   for (freq = 1; freq <= n_scxcache; freq++) {
00297     pref = C_2PI*ux*freq;
00298     o = (freq-1)*n_localpart;
00299     ic = 0;
00300     for (c = 1; c <= n_layers; c++) {
00301       np   = cells[c].n;
00302       part = cells[c].part;
00303       for (i = 0; i < np; i++) {
00304         arg = pref*part[i].r.p[0];
00305         scxcache[o + ic].s = sin(arg);
00306         scxcache[o + ic].c = cos(arg);
00307         ic++;
00308       }
00309     }
00310   }
00311 }
00312 
00313 static void prepare_scy_cache()
00314 {
00315   int np, c, i, ic, freq, o;
00316   double pref, arg;
00317   Particle *part;
00318   
00319   for (freq = 1; freq <= n_scycache; freq++) {
00320     pref = C_2PI*uy*freq;
00321     o = (freq-1)*n_localpart;
00322     ic = 0;
00323     for (c = 1; c <= n_layers; c++) {
00324       np   = cells[c].n;
00325       part = cells[c].part;
00326       for (i = 0; i < np; i++) {
00327         arg = pref*part[i].r.p[1];
00328         scycache[o + ic].s = sin(arg);
00329         scycache[o + ic].c = cos(arg);
00330         ic++;
00331       }
00332     }
00333   }
00334 }
00335 
00336 /*****************************************************************/
00337 /* data distribution */
00338 /*****************************************************************/
00339 
00340 /* vector operations */
00341 
00342 /** pdc = 0 */
00343 MDINLINE void clear_vec(double *pdc, int size)
00344 {
00345   int i;
00346   for (i = 0; i < size; i++)
00347     pdc[i] = 0;
00348 }
00349 
00350 /** pdc_d = pdc_s */
00351 MDINLINE void copy_vec(double *pdc_d, double *pdc_s, int size)
00352 {
00353   int i;
00354   for (i = 0; i < size; i++)
00355     pdc_d[i] = pdc_s[i];
00356 }
00357 
00358 /** pdc_d = pdc_s1 + pdc_s2 */
00359 MDINLINE void add_vec(double *pdc_d, double *pdc_s1, double *pdc_s2, int size)
00360 {
00361   int i;
00362   for (i = 0; i < size; i++)
00363     pdc_d[i] = pdc_s1[i] + pdc_s2[i];
00364 }
00365 
00366 /** pdc_d = scale*pdc_s1 + pdc_s2 */
00367 MDINLINE void addscale_vec(double *pdc_d, double scale, double *pdc_s1, double *pdc_s2, int size)
00368 {
00369   int i;
00370   for (i = 0; i < size; i++)
00371     pdc_d[i] = scale*pdc_s1[i] + pdc_s2[i];
00372 }
00373 
00374 /** pdc_d = scale*pdc */
00375 MDINLINE void scale_vec(double scale, double *pdc, int size)
00376 {
00377   int i;
00378   for (i = 0; i < size; i++)
00379     pdc[i] *= scale;
00380 }
00381 
00382 /* block indexing - has to fit to the PQ block definitions above.
00383    size gives the full size of the data block,
00384    e_size is the size of only the top or bottom half, i.e. half of size.
00385 */
00386 
00387 MDINLINE double *block(double *p, int index, int size)
00388 {
00389   return &p[index*size];
00390 }
00391 
00392 MDINLINE double *blwentry(double *p, int index, int e_size)
00393 {
00394   return &p[2*index*e_size];
00395 }
00396 
00397 MDINLINE double *abventry(double *p, int index, int e_size)
00398 {
00399   return &p[(2*index + 1)*e_size];
00400 }
00401 
00402 /* dealing with the image contributions from far outside the simulation box */
00403 
00404 void clear_image_contributions(int e_size)
00405 {
00406   if (this_node == 0)
00407     /* the gblcblk contains all contributions from layers deeper than one layer below our system,
00408        which is precisely what the gblcblk should contain for the lowest layer. */
00409     clear_vec(blwentry(gblcblk, 0, e_size), e_size);
00410 
00411   if (this_node == n_nodes - 1)
00412     /* same for the top node */
00413     clear_vec(abventry(gblcblk, n_layers - 1, e_size), e_size);
00414 }
00415 
00416 void gather_image_contributions(int e_size)
00417 {
00418   double recvbuf[8];
00419 
00420   /* collect the image charge contributions with at least a layer distance */
00421   MPI_Allreduce(lclimge, recvbuf, 2*e_size, MPI_DOUBLE, MPI_SUM, comm_cart);
00422 
00423   if (this_node == 0)
00424     /* the gblcblk contains all contributions from layers deeper than one layer below our system,
00425        which is precisely what the gblcblk should contain for the lowest layer. */
00426     copy_vec(blwentry(gblcblk, 0, e_size),recvbuf, e_size);
00427 
00428   if (this_node == n_nodes - 1)
00429     /* same for the top node */
00430     copy_vec(abventry(gblcblk, n_layers - 1, e_size), recvbuf + e_size, e_size);
00431 }
00432 
00433 /* the data transfer routine for the lclcblks itself */
00434 void distribute(int e_size, double fac)
00435 {
00436   int c, node, inv_node;
00437   double sendbuf[8];
00438   double recvbuf[8];
00439   MPI_Status status;
00440 
00441   /* send/recv to/from other nodes. Also builds up the gblcblk. */
00442   for (node = 0; node < n_nodes; node++) {
00443     inv_node = n_nodes - node - 1;
00444     /* up */
00445     if (node == this_node) {
00446       /* calculate sums of cells below */
00447       for (c = 1; c < n_layers; c++)
00448         addscale_vec(blwentry(gblcblk, c, e_size), fac, blwentry(gblcblk, c - 1, e_size), blwentry(lclcblk, c - 1, e_size), e_size);
00449 
00450       /* calculate my ghost contribution only if a node above exists */
00451       if (node + 1 < n_nodes) {
00452         addscale_vec(sendbuf, fac, blwentry(gblcblk, n_layers - 1, e_size), blwentry(lclcblk, n_layers - 1, e_size), e_size);
00453         copy_vec(sendbuf + e_size, blwentry(lclcblk, n_layers, e_size), e_size);
00454         MPI_Send(sendbuf, 2*e_size, MPI_DOUBLE, node + 1, 0, comm_cart);
00455       }
00456     }
00457     else if (node + 1 == this_node) {
00458       MPI_Recv(recvbuf, 2*e_size, MPI_DOUBLE, node, 0, comm_cart, &status);
00459       copy_vec(blwentry(gblcblk, 0, e_size), recvbuf, e_size);
00460       copy_vec(blwentry(lclcblk, 0, e_size), recvbuf + e_size, e_size);
00461     }
00462 
00463     /* down */
00464     if (inv_node == this_node) {
00465       /* calculate sums of all cells above */
00466       for (c = n_layers + 1; c > 2; c--)
00467         addscale_vec(abventry(gblcblk, c - 3, e_size), fac, abventry(gblcblk, c - 2, e_size), abventry(lclcblk, c, e_size), e_size);
00468       
00469       /* calculate my ghost contribution only if a node below exists */
00470       if (inv_node -  1 >= 0) {
00471         addscale_vec(sendbuf, fac, abventry(gblcblk, 0, e_size), abventry(lclcblk, 2, e_size), e_size);
00472         copy_vec(sendbuf + e_size, abventry(lclcblk, 1, e_size), e_size);
00473         MPI_Send(sendbuf, 2*e_size, MPI_DOUBLE, inv_node - 1, 0, comm_cart);
00474       }
00475     }
00476     else if (inv_node - 1 == this_node) {
00477       MPI_Recv(recvbuf, 2*e_size, MPI_DOUBLE, inv_node, 0, comm_cart, &status);
00478       copy_vec(abventry(gblcblk, n_layers - 1, e_size), recvbuf, e_size);
00479       copy_vec(abventry(lclcblk, n_layers + 1, e_size), recvbuf + e_size, e_size);
00480     }
00481   }
00482 }
00483 
00484 #ifdef CHECKPOINTS
00485 static void checkpoint(char *text, int p, int q, int e_size)
00486 {
00487   int c, i;
00488   fprintf(stderr, "%d: %s %d %d\n", this_node, text, p, q);
00489 
00490   fprintf(stderr, "partblk\n");
00491   for (c = 0; c < n_localpart; c++) {
00492     fprintf(stderr, "%d", c);    
00493     for (i = 0; i < e_size; i++)
00494       fprintf(stderr, " %10.3g", block(partblk, c, 2*e_size)[i]);
00495     fprintf(stderr, " m");
00496     for (i = 0; i < e_size; i++)
00497       fprintf(stderr, " %10.3g", block(partblk, c, 2*e_size)[i + e_size]);
00498     fprintf(stderr, "\n");
00499   }
00500   fprintf(stderr, "\n");
00501 
00502   fprintf(stderr, "lclcblk\n");
00503   fprintf(stderr, "0");    
00504   for (i = 0; i < e_size; i++)
00505     fprintf(stderr, " %10.3g", block(lclcblk, 0, 2*e_size)[i]);
00506   fprintf(stderr, "\n");
00507   for (c = 1; c <= n_layers; c++) {
00508     fprintf(stderr, "%d", c);    
00509     for (i = 0; i < e_size; i++)
00510       fprintf(stderr, " %10.3g", block(lclcblk, c, 2*e_size)[i]);
00511     fprintf(stderr, " m");
00512     for (i = 0; i < e_size; i++)
00513       fprintf(stderr, " %10.3g", block(lclcblk, c, 2*e_size)[i + e_size]);
00514     fprintf(stderr, "\n");
00515   }
00516   fprintf(stderr, "%d", n_layers + 1);
00517   for (i = 0; i < e_size; i++)
00518     fprintf(stderr, "           ");
00519   fprintf(stderr, " m");
00520 
00521   for (i = 0; i < e_size; i++)
00522     fprintf(stderr, " %10.3g", block(lclcblk, n_layers + 1, 2*e_size)[i + e_size]);
00523   fprintf(stderr, "\n");
00524 
00525   fprintf(stderr, "gblcblk\n");
00526   for (c = 0; c < n_layers; c++) {
00527     fprintf(stderr, "%d", c + 1);    
00528     for (i = 0; i < e_size; i++)
00529       fprintf(stderr, " %10.3g", block(gblcblk, c, 2*e_size)[i]);
00530     fprintf(stderr, " m");
00531     for (i = 0; i < e_size; i++)
00532       fprintf(stderr, " %10.3g", block(gblcblk, c, 2*e_size)[i + e_size]);
00533     fprintf(stderr, "\n");
00534   }
00535   fprintf(stderr, "\n");
00536 }
00537 #else
00538 #define checkpoint(text,p,q,size)
00539 #endif
00540 
00541 /*****************************************************************/
00542 /* 2 pi (sign)(z) */
00543 /*****************************************************************/
00544 
00545 static void setup_z_force()
00546 {
00547   int np, c, i;
00548   double pref = coulomb.prefactor*C_2PI*ux*uy;
00549   Particle *part;
00550   double *lclimgebot=NULL,*lclimgetop=NULL;
00551   int e_size=1,size = 2;
00552   double e, e_di_l, e_di_h;
00553 
00554   double fac_imgsum;
00555 
00556   /* in case of metallic boundary conditions on both sides, we get an infinite array,
00557      which only exists for charge neutral systems. But in this case, we can as well
00558      not sum up the force array, as the net force per image is 0 */
00559   if (mmm2d_params.delta_mult != 1.0) {
00560     fac_imgsum = 1/(1 - mmm2d_params.delta_mult);
00561   }
00562   else {
00563     fac_imgsum = 0;
00564   }
00565 
00566   if (mmm2d_params.dielectric_contrast_on) 
00567     clear_vec(lclimge, size); 
00568 
00569   if(this_node==0) {
00570     
00571     lclimgebot=blwentry(lclcblk,0,e_size);
00572     clear_vec(lclimgebot, e_size);
00573   }
00574   
00575   if(this_node==n_nodes-1) {
00576     
00577     lclimgetop=abventry(lclcblk,n_layers+1,e_size);
00578     clear_vec(lclimgetop, e_size);
00579   }
00580 
00581   /* calculate local cellblks. partblks don't make sense */
00582   for (c = 1; c <= n_layers; c++) {
00583     np   = cells[c].n;
00584     part = cells[c].part;
00585     lclcblk[size*c] = 0;
00586     for (i = 0; i < np; i++) {
00587       lclcblk[size*c] += part[i].p.q;
00588       
00589       if (mmm2d_params.dielectric_contrast_on) {
00590         e_di_l = (mmm2d_params.delta_mult*mmm2d_params.delta_mid_bot
00591                   + mmm2d_params.delta_mult)*fac_imgsum;
00592         if (c==1 && this_node==0) {
00593           e = mmm2d_params.delta_mid_bot;  
00594           lclimgebot[QQEQQP] += part[i].p.q*e;
00595         }
00596         else
00597           e_di_l += mmm2d_params.delta_mid_bot;
00598 
00599         e_di_h = (mmm2d_params.delta_mult*mmm2d_params.delta_mid_top
00600                   + mmm2d_params.delta_mult)*fac_imgsum;
00601 
00602         if (c==n_layers && this_node==n_nodes-1) {
00603           e = mmm2d_params.delta_mid_top;
00604           lclimgetop[QQEQQP] += part[i].p.q*e;
00605         }
00606         else
00607           e_di_h += mmm2d_params.delta_mid_top;
00608 
00609         lclimge[QQEQQP] += part[i].p.q*e_di_l;
00610         lclimge[QQEQQM] += part[i].p.q*e_di_h;
00611       }   
00612     }
00613     lclcblk[size*c] *= pref;
00614     lclcblk[size*c+1] = lclcblk[size*c];   
00615   }
00616 
00617   if (mmm2d_params.dielectric_contrast_on) {
00618     scale_vec(pref, lclimge, size);
00619     if(this_node==0)
00620       scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size);
00621     if(this_node==n_nodes-1)
00622       scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size);
00623   }
00624 }
00625 
00626 static void add_z_force()
00627 {
00628   int np, c, i;
00629   double add;
00630   Particle *part;
00631   double *othcblk;
00632   int size = 2;
00633 
00634   for (c = 1; c <= n_layers; c++) {
00635     othcblk = block(gblcblk, c - 1, size);
00636     add = othcblk[QQEQQP] - othcblk[QQEQQM];
00637     np   = cells[c].n;
00638     part = cells[c].part;
00639     for (i = 0; i < np; i++) {
00640       part[i].f.f[2] += part[i].p.q*add;
00641       LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n",
00642                          this_node, part[i].p.identity, part[i].f.f[0],
00643                          part[i].f.f[1], part[i].f.f[2]));
00644     }
00645   }
00646 }
00647 
00648 static void setup_z_energy()
00649 {
00650   int np, c, i;
00651   double pref = -coulomb.prefactor*C_2PI*ux*uy;
00652   Particle *part;
00653   int e_size = 2, size = 4;
00654 
00655   if (this_node == 0)
00656     /* the lowest lclcblk does not contain anything, since there are no charges below the simulation
00657        box, at least for this term. */
00658     clear_vec(blwentry(lclcblk, 0, e_size), e_size);
00659 
00660   if (this_node == n_nodes - 1)
00661     /* same for the top node */
00662     clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size);
00663 
00664   /* calculate local cellblks. partblks don't make sense */
00665   for (c = 1; c <= n_layers; c++) {
00666     np   = cells[c].n;
00667     part = cells[c].part;
00668     clear_vec(blwentry(lclcblk, c, e_size), e_size);
00669     for (i = 0; i < np; i++) {
00670       lclcblk[size*c + ABEQQP] += part[i].p.q;
00671       lclcblk[size*c + ABEQZP] += part[i].p.q*part[i].r.p[2];
00672     }
00673     scale_vec(pref, blwentry(lclcblk, c, e_size), e_size);
00674     /* just to be able to use the standard distribution. Here below
00675        and above terms are the same */
00676     copy_vec(abventry(lclcblk, c, e_size), blwentry(lclcblk, c, e_size), e_size);
00677   }
00678 }
00679 
00680 static double z_energy()
00681 {
00682   int np, c, i;
00683   Particle *part;
00684   double *othcblk;
00685   int size = 4;
00686   double eng = 0;
00687   for (c = 1; c <= n_layers; c++) {
00688     othcblk = block(gblcblk, c - 1, size);
00689     np   = cells[c].n;
00690     part = cells[c].part;
00691     for (i = 0; i < np; i++) {
00692       eng += part[i].p.q*(part[i].r.p[2]*othcblk[ABEQQP] - othcblk[ABEQZP] -
00693                           part[i].r.p[2]*othcblk[ABEQQM] + othcblk[ABEQZM]);
00694       
00695     }
00696   }
00697   return eng;
00698 }
00699 
00700 /*****************************************************************/
00701 /* PoQ exp sum */
00702 /*****************************************************************/
00703 static void setup_P(int p, double omega, double fac)
00704 {
00705   int np, c, i, ic, o = (p-1)*n_localpart;
00706   Particle *part;
00707   double pref = coulomb.prefactor*4*M_PI*ux*uy*fac*fac;
00708   double h = box_l[2];
00709   double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h));
00710   double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 
00711   double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum;
00712   double fac_delta         = mmm2d_params.delta_mult*fac_imgsum;
00713   double layer_top;
00714   double e, e_di_l, e_di_h;
00715   double *llclcblk;
00716   double *lclimgebot = NULL, *lclimgetop = NULL;
00717   int e_size = 2, size = 4;
00718 
00719   if (mmm2d_params.dielectric_contrast_on)
00720     clear_vec(lclimge, size); 
00721 
00722   if(this_node==0) {
00723     /* on the lowest node, clear the lclcblk below, which only contains the images of the lowest layer
00724        if there is dielectric contrast, otherwise it is empty */
00725     lclimgebot = block(lclcblk, 0, size);
00726     clear_vec(blwentry(lclcblk, 0, e_size), e_size);
00727   }
00728   if(this_node==n_nodes-1) {
00729     /* same for the top node */
00730     lclimgetop = block(lclcblk, n_layers + 1, size);
00731     clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size);
00732   }
00733 
00734   layer_top = my_left[2] + layer_h;
00735   ic = 0;
00736   for (c = 1; c <= n_layers; c++) {
00737     np   = cells[c].n;
00738     part = cells[c].part;
00739     llclcblk = block(lclcblk, c, size);
00740 
00741     clear_vec(llclcblk, size);
00742 
00743     for (i = 0; i < np; i++) {
00744       e = exp(omega*(part[i].r.p[2] - layer_top));
00745 
00746       partblk[size*ic + POQESM] = part[i].p.q*scxcache[o + ic].s/e;
00747       partblk[size*ic + POQESP] = part[i].p.q*scxcache[o + ic].s*e;
00748       partblk[size*ic + POQECM] = part[i].p.q*scxcache[o + ic].c/e;
00749       partblk[size*ic + POQECP] = part[i].p.q*scxcache[o + ic].c*e;
00750 
00751       /* take images due to different dielectric constants into account */
00752       if (mmm2d_params.dielectric_contrast_on) {
00753         if (c==1 && this_node==0) {
00754           /* There are image charges at -(2h+z) and -(2h-z) etc. layer_h included due to the shift
00755              in z */
00756           e_di_l = ( exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot +
00757                      exp(omega*( part[i].r.p[2] - 2*h + layer_h))                   )*fac_delta;
00758 
00759           e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot;
00760 
00761           lclimgebot[POQESP] += part[i].p.q*scxcache[o + ic].s*e;
00762           lclimgebot[POQECP] += part[i].p.q*scxcache[o + ic].c*e;
00763         }
00764         else
00765           /* There are image charges at -(z) and -(2h-z) etc. layer_h included due to the shift in z */
00766           e_di_l = ( exp(omega*(-part[i].r.p[2]       + layer_h)) +
00767                      exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot;    
00768 
00769         if (c==n_layers && this_node==n_nodes-1) {
00770           /* There are image charges at (3h-z) and (h+z) from the top layer etc. layer_h included
00771              due to the shift in z */
00772           e_di_h = ( exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top +
00773                      exp(omega*(-part[i].r.p[2] -   h + 2*layer_h))                 )*fac_delta;
00774           
00775           /* There are image charges at (h-z) layer_h included due to the shift in z */
00776           e = exp(omega*(part[i].r.p[2] - h + layer_h))*mmm2d_params.delta_mid_top;
00777           
00778           lclimgetop[POQESM]+= part[i].p.q*scxcache[o + ic].s*e;
00779           lclimgetop[POQECM]+= part[i].p.q*scxcache[o + ic].c*e;
00780         }
00781         else
00782           /* There are image charges at (h-z) and (h+z) from the top layer etc. layer_h included due
00783              to the shift in z */
00784           e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) +
00785                      exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top;
00786 
00787         lclimge[POQESP] += part[i].p.q*scxcache[o + ic].s*e_di_l;
00788         lclimge[POQECP] += part[i].p.q*scxcache[o + ic].c*e_di_l;
00789         lclimge[POQESM] += part[i].p.q*scxcache[o + ic].s*e_di_h;
00790         lclimge[POQECM] += part[i].p.q*scxcache[o + ic].c*e_di_h;
00791       }
00792 
00793       add_vec(llclcblk, llclcblk, block(partblk, ic, size), size);
00794       ic++;
00795     }
00796     scale_vec(pref, blwentry(lclcblk, c, e_size), e_size);
00797     scale_vec(pref, abventry(lclcblk, c, e_size), e_size);
00798 
00799     layer_top += layer_h;
00800   }
00801 
00802   if (mmm2d_params.dielectric_contrast_on) {
00803     scale_vec(pref, lclimge, size);
00804     if(this_node==0)
00805       scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size);
00806     if(this_node==n_nodes-1)
00807       scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size);
00808   }
00809 }
00810 
00811 /* compare setup_P */
00812 static void setup_Q(int q, double omega, double fac)
00813 {
00814   int np, c, i, ic, o = (q-1)*n_localpart;
00815   Particle *part;
00816   double pref = coulomb.prefactor*4*M_PI*ux*uy*fac*fac;
00817   double h = box_l[2];
00818   double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h));
00819   double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 
00820   double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum;
00821   double fac_delta         = mmm2d_params.delta_mult*fac_imgsum;
00822   double layer_top;
00823   double e, e_di_l, e_di_h;
00824   double *llclcblk;
00825   double *lclimgebot=NULL, *lclimgetop=NULL;
00826   int e_size = 2, size = 4;
00827  
00828   if (mmm2d_params.dielectric_contrast_on)
00829     clear_vec(lclimge, size); 
00830   
00831   if(this_node==0) {
00832     lclimgebot = block(lclcblk, 0, size);
00833     clear_vec(blwentry(lclcblk, 0, e_size), e_size);
00834   }
00835   
00836   if(this_node==n_nodes-1) {
00837     lclimgetop = block(lclcblk, n_layers + 1, size);
00838     clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size);
00839   }
00840   
00841   layer_top = my_left[2] + layer_h;
00842   ic = 0;
00843   for (c = 1; c <= n_layers; c++) {
00844     np   = cells[c].n;
00845     part = cells[c].part;
00846     llclcblk = block(lclcblk, c, size);
00847 
00848     clear_vec(llclcblk, size);
00849 
00850     for (i = 0; i < np; i++) {
00851       e = exp(omega*(part[i].r.p[2] - layer_top));
00852 
00853       partblk[size*ic + POQESM] = part[i].p.q*scycache[o + ic].s/e;
00854       partblk[size*ic + POQESP] = part[i].p.q*scycache[o + ic].s*e;
00855       partblk[size*ic + POQECM] = part[i].p.q*scycache[o + ic].c/e;
00856       partblk[size*ic + POQECP] = part[i].p.q*scycache[o + ic].c*e;
00857 
00858       if (mmm2d_params.dielectric_contrast_on) {
00859         if(c==1 && this_node==0) {
00860           e_di_l = ( exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot +
00861                      exp(omega*( part[i].r.p[2] - 2*h + layer_h))                 )*fac_delta;
00862         
00863           e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot;
00864 
00865           lclimgebot[POQESP] += part[i].p.q*scycache[o + ic].s*e;
00866           lclimgebot[POQECP] += part[i].p.q*scycache[o + ic].c*e;
00867         }
00868         else
00869           e_di_l = ( exp(omega*(-part[i].r.p[2]       + layer_h)) +
00870                      exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot;    
00871 
00872         if(c==n_layers && this_node==n_nodes-1) {
00873           e_di_h = (exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top +
00874                     exp(omega*(-part[i].r.p[2] -   h + 2*layer_h))                 )*fac_delta;
00875                   
00876           e = exp(omega*(part[i].r.p[2] - h + layer_h))*mmm2d_params.delta_mid_top;
00877           
00878           lclimgetop[POQESM] += part[i].p.q*scycache[o + ic].s*e;
00879           lclimgetop[POQECM] += part[i].p.q*scycache[o + ic].c*e;
00880         }
00881         else
00882           e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) +
00883                      exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top;
00884 
00885         lclimge[POQESP] += part[i].p.q*scycache[o + ic].s*e_di_l;
00886         lclimge[POQECP] += part[i].p.q*scycache[o + ic].c*e_di_l;
00887         lclimge[POQESM] += part[i].p.q*scycache[o + ic].s*e_di_h;
00888         lclimge[POQECM] += part[i].p.q*scycache[o + ic].c*e_di_h;
00889       }
00890       
00891       add_vec(llclcblk, llclcblk, block(partblk, ic, size), size);
00892       ic++;
00893     }
00894     scale_vec(pref, blwentry(lclcblk, c, e_size), e_size);
00895     scale_vec(pref, abventry(lclcblk, c, e_size), e_size);
00896 
00897     layer_top += layer_h;
00898   }
00899 
00900   if (mmm2d_params.dielectric_contrast_on) {
00901     scale_vec(pref, lclimge, size);
00902     if(this_node==0)
00903       scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size);
00904     if(this_node==n_nodes-1)
00905       scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size);
00906   }
00907 }
00908 
00909 static void add_P_force()
00910 {
00911   int np, c, i, ic;
00912   Particle *part;
00913   double *othcblk;
00914   int size = 4;
00915 
00916   ic = 0;
00917   for (c = 1; c <= n_layers; c++) {
00918     np   = cells[c].n;
00919     part = cells[c].part;
00920     othcblk = block(gblcblk, c - 1, size);
00921 
00922     for (i = 0; i < np; i++) {
00923       part[i].f.f[0] +=
00924         partblk[size*ic + POQESM]*othcblk[POQECP] - partblk[size*ic + POQECM]*othcblk[POQESP] +
00925         partblk[size*ic + POQESP]*othcblk[POQECM] - partblk[size*ic + POQECP]*othcblk[POQESM];
00926       part[i].f.f[2] +=
00927         partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] -
00928         partblk[size*ic + POQECP]*othcblk[POQECM] - partblk[size*ic + POQESP]*othcblk[POQESM];
00929 
00930       LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n",
00931                          this_node, part[i].p.identity, part[i].f.f[0],
00932                          part[i].f.f[1], part[i].f.f[2]));
00933       ic++;
00934     }
00935   }
00936 }
00937 
00938 static double P_energy(double omega)
00939 {
00940   int np, c, i, ic;
00941   double *othcblk;
00942   int size = 4;
00943   double eng = 0;
00944   double pref = 1/omega;
00945 
00946   ic = 0;
00947   for (c = 1; c <= n_layers; c++) {
00948     np   = cells[c].n;
00949     othcblk = block(gblcblk, c - 1, size);
00950     for (i = 0; i < np; i++) {
00951       eng += pref*(partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] +
00952                    partblk[size*ic + POQECP]*othcblk[POQECM] + partblk[size*ic + POQESP]*othcblk[POQESM]);
00953       ic++;
00954     }
00955   }
00956   return eng;
00957 }
00958 
00959 static void add_Q_force()
00960 {
00961   int np, c, i, ic;
00962   Particle *part;
00963   double *othcblk;
00964   int size = 4;
00965 
00966   ic = 0;
00967   for (c = 1; c <= n_layers; c++) {
00968     np   = cells[c].n;
00969     part = cells[c].part;
00970     othcblk = block(gblcblk, c - 1, size);
00971 
00972     for (i = 0; i < np; i++) {
00973       part[i].f.f[1] +=
00974         partblk[size*ic + POQESM]*othcblk[POQECP] - partblk[size*ic + POQECM]*othcblk[POQESP] +
00975         partblk[size*ic + POQESP]*othcblk[POQECM] - partblk[size*ic + POQECP]*othcblk[POQESM];
00976       part[i].f.f[2] +=
00977         partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] -
00978         partblk[size*ic + POQECP]*othcblk[POQECM] - partblk[size*ic + POQESP]*othcblk[POQESM];
00979 
00980       LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n",
00981                          this_node, part[i].p.identity, part[i].f.f[0],
00982                          part[i].f.f[1], part[i].f.f[2]));
00983       ic++;
00984     }
00985   }
00986 }
00987 
00988 static double Q_energy(double omega)
00989 {
00990   int np, c, i, ic;
00991   double *othcblk;
00992   int size = 4;
00993   double eng = 0;
00994   double pref = 1/omega;
00995 
00996   ic = 0;
00997   for (c = 1; c <= n_layers; c++) {
00998     np   = cells[c].n;
00999     othcblk = block(gblcblk, c - 1, size);
01000 
01001     for (i = 0; i < np; i++) {
01002       eng += pref*(partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] +
01003                    partblk[size*ic + POQECP]*othcblk[POQECM] + partblk[size*ic + POQESP]*othcblk[POQESM]);
01004       ic++;
01005     }
01006   }
01007   return eng;
01008 }
01009 
01010 /*****************************************************************/
01011 /* PQ particle blocks */
01012 /*****************************************************************/
01013 
01014 /* compare setup_P */
01015 static void setup_PQ(int p, int q, double omega, double fac)
01016 {
01017   int np, c, i, ic, ox = (p - 1)*n_localpart, oy = (q - 1)*n_localpart;
01018   Particle *part;
01019   double pref = coulomb.prefactor*8*M_PI*ux*uy*fac*fac;
01020   double h = box_l[2];
01021   double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h));
01022   double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 
01023   double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum;
01024   double fac_delta         = mmm2d_params.delta_mult*fac_imgsum;
01025   double layer_top;
01026   double e, e_di_l, e_di_h;
01027   double *llclcblk;
01028   double *lclimgebot=NULL, *lclimgetop=NULL;
01029   int e_size = 4, size = 8;
01030 
01031   if (mmm2d_params.dielectric_contrast_on)
01032     clear_vec(lclimge, size); 
01033 
01034   if(this_node==0) {
01035     lclimgebot = block(lclcblk, 0, size);
01036     clear_vec(blwentry(lclcblk, 0, e_size), e_size);
01037   }
01038   
01039   if(this_node==n_nodes-1) {
01040     lclimgetop = block(lclcblk, n_layers + 1, size);
01041     clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size);
01042   }
01043   
01044   layer_top = my_left[2] + layer_h;
01045   ic = 0;
01046   for (c = 1; c <= n_layers; c++) {
01047     np   = cells[c].n;
01048     part = cells[c].part;
01049     llclcblk = block(lclcblk, c, size);
01050 
01051     clear_vec(llclcblk, size);
01052 
01053     for (i = 0; i < np; i++) {
01054       e = exp(omega*(part[i].r.p[2] - layer_top));
01055       
01056       partblk[size*ic + PQESSM] = scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q/e;
01057       partblk[size*ic + PQESCM] = scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q/e;
01058       partblk[size*ic + PQECSM] = scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q/e;
01059       partblk[size*ic + PQECCM] = scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q/e;
01060 
01061       partblk[size*ic + PQESSP] = scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e;
01062       partblk[size*ic + PQESCP] = scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e;
01063       partblk[size*ic + PQECSP] = scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e;
01064       partblk[size*ic + PQECCP] = scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e;
01065 
01066       if (mmm2d_params.dielectric_contrast_on) {
01067         if(c==1 && this_node==0) {      
01068           e_di_l = (exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot +
01069                     exp(omega*( part[i].r.p[2] - 2*h + layer_h))                 )*fac_delta;
01070 
01071           e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot;
01072         
01073           lclimgebot[PQESSP] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e;
01074           lclimgebot[PQESCP] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e;
01075           lclimgebot[PQECSP] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e;
01076           lclimgebot[PQECCP] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e;
01077         }
01078         else    
01079           e_di_l = ( exp(omega*(-part[i].r.p[2]       + layer_h)) +
01080                      exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot;     
01081         
01082         if(c==n_layers && this_node==n_nodes-1) {
01083           e_di_h = ( exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top +
01084                      exp(omega*(-part[i].r.p[2] -   h + 2*layer_h))                 )*fac_delta;
01085           
01086           e = exp(omega*(part[i].r.p[2]-h+layer_h))*mmm2d_params.delta_mid_top;
01087         
01088           lclimgetop[PQESSM] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e;
01089           lclimgetop[PQESCM] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e;
01090           lclimgetop[PQECSM] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e;
01091           lclimgetop[PQECCM] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e;
01092         }
01093         else
01094           e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) +
01095                      exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top;  
01096       
01097         lclimge[PQESSP] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e_di_l;
01098         lclimge[PQESCP] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e_di_l;
01099         lclimge[PQECSP] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e_di_l;
01100         lclimge[PQECCP] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e_di_l;
01101 
01102         lclimge[PQESSM] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e_di_h;
01103         lclimge[PQESCM] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e_di_h;
01104         lclimge[PQECSM] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e_di_h;
01105         lclimge[PQECCM] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e_di_h;
01106       }
01107       
01108       add_vec(llclcblk, llclcblk, block(partblk, ic, size), size);
01109       ic++;
01110     }
01111     scale_vec(pref, blwentry(lclcblk, c, e_size), e_size);
01112     scale_vec(pref, abventry(lclcblk, c, e_size), e_size);
01113     
01114     layer_top += layer_h;
01115   }
01116 
01117   if (mmm2d_params.dielectric_contrast_on) {
01118     scale_vec(pref, lclimge, size);
01119 
01120     if(this_node==0)
01121       scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size);
01122     if(this_node==n_nodes-1)
01123       scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size);
01124   }
01125 }
01126 
01127 static void add_PQ_force(int p, int q, double omega)
01128 {
01129   int np, c, i, ic;
01130   Particle *part;
01131   double pref_x = C_2PI*ux*p/omega;
01132   double pref_y = C_2PI*uy*q/omega;
01133   double *othcblk;
01134   int size = 8;
01135 
01136   ic = 0;
01137   for (c = 1; c <= n_layers; c++) {
01138     np   = cells[c].n;
01139     part = cells[c].part;
01140     othcblk = block(gblcblk, c - 1, size);
01141 
01142     for (i = 0; i < np; i++) {
01143       part[i].f.f[0] +=
01144         pref_x*(partblk[size*ic + PQESCM]*othcblk[PQECCP] + partblk[size*ic + PQESSM]*othcblk[PQECSP] -
01145                 partblk[size*ic + PQECCM]*othcblk[PQESCP] - partblk[size*ic + PQECSM]*othcblk[PQESSP] +
01146                 partblk[size*ic + PQESCP]*othcblk[PQECCM] + partblk[size*ic + PQESSP]*othcblk[PQECSM] -
01147                 partblk[size*ic + PQECCP]*othcblk[PQESCM] - partblk[size*ic + PQECSP]*othcblk[PQESSM]);
01148       part[i].f.f[1] +=
01149         pref_y*(partblk[size*ic + PQECSM]*othcblk[PQECCP] + partblk[size*ic + PQESSM]*othcblk[PQESCP] -
01150                 partblk[size*ic + PQECCM]*othcblk[PQECSP] - partblk[size*ic + PQESCM]*othcblk[PQESSP] +
01151                 partblk[size*ic + PQECSP]*othcblk[PQECCM] + partblk[size*ic + PQESSP]*othcblk[PQESCM] -
01152                 partblk[size*ic + PQECCP]*othcblk[PQECSM] - partblk[size*ic + PQESCP]*othcblk[PQESSM]);
01153       part[i].f.f[2] +=
01154                (partblk[size*ic + PQECCM]*othcblk[PQECCP] + partblk[size*ic + PQECSM]*othcblk[PQECSP] +
01155                 partblk[size*ic + PQESCM]*othcblk[PQESCP] + partblk[size*ic + PQESSM]*othcblk[PQESSP] -
01156                 partblk[size*ic + PQECCP]*othcblk[PQECCM] - partblk[size*ic + PQECSP]*othcblk[PQECSM] -
01157                 partblk[size*ic + PQESCP]*othcblk[PQESCM] - partblk[size*ic + PQESSP]*othcblk[PQESSM]);
01158 
01159       LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n",
01160                          this_node, part[i].p.identity, part[i].f.f[0],
01161                          part[i].f.f[1], part[i].f.f[2]));
01162       ic++;
01163     }
01164   }
01165 }
01166 
01167 static double PQ_energy(double omega)
01168 {
01169   int size = 8;
01170   double eng = 0;
01171   double pref = 1/omega;
01172 
01173   int ic = 0;
01174   for (int c = 1; c <= n_layers; c++) {
01175     int np   = cells[c].n;
01176     double *othcblk = block(gblcblk, c - 1, size);
01177 
01178     for (int i = 0; i < np; i++) {
01179       eng += pref*(partblk[size*ic + PQECCM]*othcblk[PQECCP] + partblk[size*ic + PQECSM]*othcblk[PQECSP] +
01180                    partblk[size*ic + PQESCM]*othcblk[PQESCP] + partblk[size*ic + PQESSM]*othcblk[PQESSP] +
01181                    partblk[size*ic + PQECCP]*othcblk[PQECCM] + partblk[size*ic + PQECSP]*othcblk[PQECSM] +
01182                    partblk[size*ic + PQESCP]*othcblk[PQESCM] + partblk[size*ic + PQESSP]*othcblk[PQESSM]);
01183       ic++;
01184     }
01185   }
01186   return eng;
01187 }
01188 
01189 /*****************************************************************/
01190 /* main loops */
01191 /*****************************************************************/
01192 
01193 static void add_force_contribution(int p, int q)
01194 {
01195   double omega, fac;
01196   
01197   if (q == 0) {
01198     if (p == 0) {
01199       setup_z_force();
01200 
01201       if (mmm2d_params.dielectric_contrast_on)
01202         gather_image_contributions(1);
01203       else
01204         clear_image_contributions(1);
01205 
01206       distribute(1, 1.);
01207       add_z_force();
01208       checkpoint("************2piz", 0, 0, 1);
01209     }
01210     else {
01211       omega = C_2PI*ux*p;
01212       fac = exp(-omega*layer_h);
01213       setup_P(p, omega, fac);
01214       if (mmm2d_params.dielectric_contrast_on)
01215         gather_image_contributions(2);
01216       else
01217         clear_image_contributions(2);
01218       distribute(2, fac); 
01219       add_P_force();
01220       checkpoint("************distri p", p, 0, 2);
01221     }
01222   }
01223   else if (p == 0) {
01224     omega = C_2PI*uy*q;
01225     fac = exp(-omega*layer_h);
01226     setup_Q(q, omega, fac);
01227     if (mmm2d_params.dielectric_contrast_on)
01228       gather_image_contributions(2);
01229     else
01230       clear_image_contributions(2);
01231     distribute(2, fac);
01232     add_Q_force();
01233     checkpoint("************distri q", 0, q, 2);
01234   }
01235   else {
01236     omega = C_2PI*sqrt(SQR(ux*p) + SQR(uy*q));
01237     fac = exp(-omega*layer_h);
01238     setup_PQ(p, q, omega, fac);
01239     if (mmm2d_params.dielectric_contrast_on)
01240       gather_image_contributions(4);
01241     else
01242       clear_image_contributions(4);
01243     distribute(4, fac);
01244     add_PQ_force(p, q, omega);
01245     checkpoint("************distri pq", p, q, 4);
01246   }
01247 }
01248 
01249 static double energy_contribution(int p, int q)
01250 {
01251   double eng;
01252   double omega, fac;
01253   
01254   if (q == 0) {
01255     if (p == 0) {
01256       setup_z_energy();
01257       clear_image_contributions(2);
01258       distribute(2, 1.);
01259       eng = z_energy();
01260       checkpoint("E************2piz", 0, 0, 2);
01261     }
01262     else {
01263       omega = C_2PI*ux*p;
01264       fac = exp(-omega*layer_h);
01265       setup_P(p, omega, fac);
01266       if (mmm2d_params.dielectric_contrast_on)
01267         gather_image_contributions(2);
01268       else
01269         clear_image_contributions(2);
01270       distribute(2, fac);
01271       eng = P_energy(omega);
01272       checkpoint("************distri p", p, 0, 2);
01273     }
01274   }
01275   else if (p == 0) {
01276     omega = C_2PI*uy*q;
01277     fac = exp(-omega*layer_h);
01278     setup_Q(q, omega, fac);
01279     if (mmm2d_params.dielectric_contrast_on)
01280       gather_image_contributions(2);
01281     else
01282       clear_image_contributions(2);
01283     distribute(2, fac);
01284     eng = Q_energy(omega);
01285     checkpoint("************distri q", 0, q, 2);
01286   }
01287   else {
01288     omega = C_2PI*sqrt(SQR(ux*p) + SQR(uy*q));
01289     fac = exp(-omega*layer_h);
01290     setup_PQ(p, q, omega, fac);
01291     if (mmm2d_params.dielectric_contrast_on)
01292       gather_image_contributions(4);
01293     else
01294       clear_image_contributions(4);
01295     distribute(4, fac);
01296     eng = PQ_energy(omega);
01297     checkpoint("************distri pq", p, q, 4);
01298   }
01299   return eng;
01300 }
01301 
01302 double MMM2D_add_far(int f, int e)
01303 {
01304   double eng;
01305   int p, q;
01306   double R, dR, q2;
01307   int *undone;
01308   
01309   // It's not really far...
01310   eng = e ? self_energy : 0;
01311 
01312   if (mmm2d_params.far_cut == 0.0)
01313     return 0.5*eng;
01314   
01315   undone = malloc((n_scxcache + 1)*sizeof(int));
01316 
01317   prepare_scx_cache();
01318   prepare_scy_cache();
01319 
01320   /* complicated loop. We work through the p,q vectors in rings
01321      from outside to inside to avoid problems with cancellation */
01322 
01323   /* up to which q vector we have to work */
01324   for (p = 0; p <= n_scxcache; p++) {
01325     if (p == 0)
01326       q =  n_scycache;
01327     else {
01328       q2 = mmm2d_params.far_cut2 - SQR(ux*(p - 1));
01329       if (q2 > 0)
01330         q = 1 + box_l[1]*(int)ceil(sqrt(q2));
01331       else
01332         q = 1;
01333       /* just to be on the safe side... */
01334       if (q > n_scycache) q = n_scycache;
01335     }
01336     undone[p] = q;
01337   }
01338 
01339   dR = -log(FARRELPREC)/C_2PI*uz;
01340   
01341   for(R = mmm2d_params.far_cut; R > 0; R -= dR) {
01342     for (p = n_scxcache; p >= 0; p--) {
01343       for (q = undone[p]; q >= 0; q--) {
01344         if (ux2*SQR(p)  + uy2*SQR(q) < SQR(R))
01345           break;
01346         if (f)
01347           add_force_contribution(p, q);
01348         if (e)
01349           eng += energy_contribution(p, q);
01350       }
01351       undone[p] = q;
01352     }
01353   }
01354   // printf("yyyy\n");
01355   /* clean up left overs */
01356   for (p = n_scxcache; p >= 0; p--) {
01357     q = undone[p];
01358     // fprintf(stderr, "left over %d\n", q);
01359     for (; q >= 0; q--) {
01360       // printf("xxxxx %d %d\n", p, q);
01361       if (f)
01362         add_force_contribution(p, q);
01363       if (e)
01364         eng += energy_contribution(p, q);
01365      }
01366   }
01367   
01368   free(undone);
01369   
01370   return 0.5*eng;
01371 }
01372 
01373 static int MMM2D_tune_far(double error)
01374 {
01375   double err;
01376   double min_inv_boxl = dmin(ux, uy);
01377   mmm2d_params.far_cut = min_inv_boxl;
01378   do {
01379     err = exp(-2*M_PI*mmm2d_params.far_cut*min_far)/min_far*
01380       (C_2PI*mmm2d_params.far_cut + 2*(ux + uy) + 1/min_far);
01381     // fprintf(stderr, "far tuning: %f -> %f, %f\n", mmm2d_params.far_cut, err, min_far);
01382     mmm2d_params.far_cut += min_inv_boxl;
01383   }
01384   while (err > error && mmm2d_params.far_cut*layer_h < MAXIMAL_FAR_CUT);
01385   if (mmm2d_params.far_cut*layer_h >= MAXIMAL_FAR_CUT)
01386     return ERROR_FARC;
01387   // fprintf(stderr, "far cutoff %g %g %g\n", mmm2d_params.far_cut, err, min_far);
01388   mmm2d_params.far_cut -= min_inv_boxl;
01389   mmm2d_params.far_cut2 = SQR(mmm2d_params.far_cut);
01390   return 0;
01391 }
01392 
01393 /****************************************
01394  * NEAR FORMULA
01395  ****************************************/
01396 
01397 static int MMM2D_tune_near(double error)
01398 {
01399   int P, n, i;
01400   double uxrho2m2max, uxrhomax2;
01401   int p;
01402   double T, pref, err, exponent;
01403   double L, sum;
01404 
01405   /* yes, it's y only... */
01406   if (max_near > box_l[1]/2)
01407     return ERROR_LARGE;
01408   if (min_far < 0)
01409     return ERROR_SMALL;
01410   if (ux*box_l[1] >= 3/M_SQRT2 )
01411     return ERROR_BOXL;
01412 
01413   /* error is split into three parts:
01414      one part for bessel, one for complex
01415      and one for polygamma cutoff */
01416   part_error = error/3;
01417 
01418   /* Bessel sum, determine cutoff */
01419   P = 2;
01420   exponent = M_PI*ux*box_l[1];
01421   T  = exp(exponent)/exponent;
01422   pref = 8*ux*dmax(C_2PI*ux, 1);
01423   do {
01424     L = M_PI*ux*(P - 1);
01425     sum = 0;
01426     for (p = 1; p <= P; p++)
01427       sum += p*exp(-exponent*p);
01428     err = pref*K1(box_l[1]*L)*(T*((L + uy)/M_PI*box_l[0] - 1) + sum);
01429     P++;
01430   }
01431   while (err > part_error && (P - 1) < MAXIMAL_B_CUT);
01432   P--;
01433   if (P == MAXIMAL_B_CUT)
01434     return ERROR_BESSEL;
01435 
01436   // fprintf(stderr, "bessel cutoff %d %g\n", P, err);
01437 
01438   realloc_intlist(&besselCutoff, besselCutoff.n = P);
01439   for (p = 1; p < P; p++)
01440     besselCutoff.e[p-1] = (int)floor(((double)P)/(2*p)) + 1;
01441 
01442   /* complex sum, determine cutoffs (dist dependent) */
01443   T = log(part_error/(16*M_SQRT2)*box_l[0]*box_l[1]);
01444   // for 0, the sum is exactly zero, so do not calculate anything
01445   complexCutoff[0] = 0;
01446   for (i = 1; i <= COMPLEX_STEP; i++)
01447     complexCutoff[i] = (int)ceil(T/log(i/COMPLEX_FAC));
01448   prepareBernoulliNumbers(complexCutoff[COMPLEX_STEP]);
01449 
01450   /* polygamma, determine order */
01451   n = 1;
01452   uxrhomax2 = SQR(ux*box_l[1])/2;
01453   uxrho2m2max = 1.0;
01454   do {
01455     create_mod_psi_up_to(n+1);
01456 
01457     err = 2*n*fabs(mod_psi_even(n, 0.5))*uxrho2m2max;
01458     uxrho2m2max *= uxrhomax2;
01459     n++;
01460   }
01461   while (err > 0.1*part_error && n < MAXIMAL_POLYGAMMA);
01462   if (n == MAXIMAL_POLYGAMMA)
01463     return ERROR_POLY;
01464   // fprintf(stderr, "polygamma cutoff %d %g\n", n, err);
01465 
01466   return 0;
01467 }
01468 
01469 static void prepareBernoulliNumbers(int bon_order)
01470 {
01471   int l;
01472   /* BernoulliB[2 n]/(2 n)!(2 Pi)^(2n) up to order 33 */
01473   static double bon_table[34] = {
01474      1.0000000000000000000, 3.2898681336964528729,
01475     -2.1646464674222763830, 2.0346861239688982794,
01476     -2.0081547123958886788, 2.0019891502556361707,
01477     -2.0004921731066160966, 2.0001224962701174097,
01478     -2.0000305645188173037, 2.0000076345865299997,
01479     -2.0000019079240677456, 2.0000004769010054555,
01480     -2.0000001192163781025, 2.0000000298031096567,
01481     -2.0000000074506680496, 2.0000000018626548648,
01482     -2.0000000004656623667, 2.0000000001164154418,
01483     -2.0000000000291038438, 2.0000000000072759591,
01484     -2.0000000000018189896, 2.0000000000004547474,
01485     -2.0000000000001136868, 2.0000000000000284217,
01486     -2.0000000000000071054, 2.0000000000000017764,
01487     -2.0000000000000004441, 2.0000000000000001110,
01488     -2.0000000000000000278, 2.0000000000000000069,
01489     -2.0000000000000000017, 2.0000000000000000004,
01490     -2.0000000000000000001, 2.0000000000000000000
01491   };
01492 
01493   if (bon_order < 2)
01494     bon_order = 2;
01495 
01496   realloc_doublelist(&bon, bon.n = bon_order);
01497 
01498   /* the ux is multiplied in to bessel, complex and psi at once, not here,
01499      and we use uy*(z + iy), so the uy is also treated below */
01500   for(l = 1; (l <= bon_order) && (l < 34); l++)
01501     bon.e[l-1] = 2*uy*bon_table[l];
01502 
01503   for (; l <= bon_order; l++) {
01504     if (l & 1)
01505       bon.e[l-1] =  4.0*uy;
01506     else
01507       bon.e[l-1] = -4.0*uy;      
01508   }
01509 }
01510 
01511 void add_mmm2d_coulomb_pair_force(double charge_factor,
01512                                   double d[3], double dl2, double dl, double force[3])
01513 {
01514   double F[3];
01515   double pref = coulomb.prefactor*charge_factor;
01516   double z2   = d[2]*d[2];
01517   double rho2 = d[1]*d[1] + z2;
01518   int i;
01519 
01520 #ifdef ADDITIONAL_CHECKS
01521   if (d[2] >box_l[1]/2) {
01522     char *errtxt = runtime_error(128);
01523     ERROR_SPRINTF(errtxt, "{024 near formula called for too distant particle pair} ");
01524     return;
01525   }
01526 #endif
01527 
01528   F[0] = F[1] = F[2] = 0;
01529 
01530   /* Bessel sum */
01531   {
01532     int p, l;
01533     double k0, k1;
01534     double k0Sum, k1ySum, k1Sum;
01535     double freq;
01536     double rho_l, ypl;
01537     double c, s;
01538 
01539     for (p = 1; p < besselCutoff.n; p++) {
01540       k0Sum  = 0;
01541       k1ySum = 0;
01542       k1Sum  = 0;
01543 
01544       freq = C_2PI*ux*p;
01545 
01546       for (l = 1; l < besselCutoff.e[p-1]; l++) {
01547         ypl   = d[1] + l*box_l[1];
01548         rho_l = sqrt(ypl*ypl + z2);
01549 #ifdef BESSEL_MACHINE_PREC
01550         k0 = K0(freq*rho_l);
01551         k1 = K1(freq*rho_l);
01552 #else
01553         LPK01(freq*rho_l, &k0, &k1);
01554 #endif
01555         k1 /= rho_l;
01556         k0Sum  += k0;
01557         k1Sum  += k1;
01558         k1ySum += k1*ypl;
01559 
01560         ypl   = d[1] - l*box_l[1];
01561         rho_l = sqrt(ypl*ypl + z2);
01562 #ifdef BESSEL_MACHINE_PREC
01563         k0 = K0(freq*rho_l);
01564         k1 = K1(freq*rho_l);
01565 #else
01566         LPK01(freq*rho_l, &k0, &k1);
01567 #endif
01568         k1 /= rho_l;
01569         k0Sum  += k0;
01570         k1Sum  += k1;
01571         k1ySum += k1*ypl;
01572       }
01573 
01574       /* the ux is multiplied in to bessel, complex and psi at once, not here */
01575       c = 4*freq*cos(freq*d[0]);
01576       s = 4*freq*sin(freq*d[0]);
01577       F[0] +=      s*k0Sum;
01578       F[1] +=      c*k1ySum;
01579       F[2] += d[2]*c*k1Sum;
01580     }
01581     // fprintf(stderr, " bessel force %f %f %f\n", F[0], F[1], F[2]);
01582   }
01583 
01584   /* complex sum */
01585   {
01586     double zeta_r, zeta_i;
01587     double zet2_r, zet2_i;
01588     double ztn_r,  ztn_i;
01589     double tmp_r;
01590     int end, n;
01591 
01592     ztn_r = zeta_r = uy*d[2];
01593     ztn_i = zeta_i = uy*d[1];
01594     zet2_r = zeta_r*zeta_r - zeta_i*zeta_i;
01595     zet2_i = 2*zeta_r*zeta_i;
01596 
01597     end = (int)ceil(COMPLEX_FAC*uy2*rho2);
01598     if (end > COMPLEX_STEP) {
01599       end = COMPLEX_STEP;
01600       fprintf(stderr, "MMM2D: some particles left the assumed slab, precision might be lost\n");
01601     }
01602     if (end < 0) {
01603       char *errtxt = runtime_error(100);
01604       ERROR_SPRINTF(errtxt, "{MMM2D: distance was negative, coordinates probably out of range } ");
01605       end = 0;
01606     }
01607     end = complexCutoff[end];
01608 
01609     for (n = 0; n < end; n++) {
01610       F[1] -= bon.e[n]*ztn_i;
01611       F[2] += bon.e[n]*ztn_r;
01612 
01613       tmp_r = ztn_r*zet2_r - ztn_i*zet2_i;
01614       ztn_i = ztn_r*zet2_i + ztn_i*zet2_r;
01615       ztn_r = tmp_r;
01616     }
01617     // fprintf(stderr, "complex force %f %f %f %d\n", F[0], F[1], F[2], end);
01618   }
01619 
01620   /* psi sum */
01621   {
01622     int n;
01623     double uxx = ux*d[0];
01624     double uxrho2 = ux2*rho2;
01625     double uxrho_2n, uxrho_2nm2; /* rho^{2n-2} */
01626     double mpe, mpo;
01627 
01628     /* n = 0 inflicts only Fx and pot */
01629     /* one ux is multiplied in to bessel, complex and psi at once, not here */
01630     F[0] += ux*mod_psi_odd(0, uxx);
01631 
01632     uxrho_2nm2 = 1.0;
01633     for (n = 1;n < n_modPsi; n++) {
01634       mpe    = mod_psi_even(n, uxx);
01635       mpo    = mod_psi_odd(n, uxx);
01636       uxrho_2n = uxrho_2nm2*uxrho2;
01637 
01638       F[0] +=     ux *uxrho_2n  *mpo;
01639       F[1] += 2*n*ux2*uxrho_2nm2*mpe*d[1];
01640       F[2] += 2*n*ux2*uxrho_2nm2*mpe*d[2];
01641 
01642       /* y < rho => ux2*uxrho_2nm2*d[1] < ux*uxrho_2n */
01643       if (fabs(2*n*ux*uxrho_2n*mpe) < part_error)
01644         break;
01645 
01646       uxrho_2nm2 = uxrho_2n;
01647     }
01648     // fprintf(stderr, "    psi force %f %f %f %d\n", F[0], F[1], F[2], n);
01649   }
01650 
01651 
01652   for (i = 0; i < 3; i++)
01653     F[i] *= ux;
01654 
01655   /* explicitly added potentials r_{-1,0} and r_{1,0} */
01656   {
01657     double cx    = d[0] + box_l[0];
01658     double rinv2 = 1.0/(cx*cx + rho2), rinv = sqrt(rinv2);
01659     double rinv3 = rinv*rinv2;
01660     F[0] +=   cx*rinv3;
01661     F[1] += d[1]*rinv3;
01662     F[2] += d[2]*rinv3;
01663 
01664     cx   = d[0] - box_l[0];
01665     rinv2 = 1.0/(cx*cx + rho2); rinv = sqrt(rinv2);
01666     rinv3 = rinv*rinv2;
01667     F[0] +=   cx*rinv3;
01668     F[1] += d[1]*rinv3;
01669     F[2] += d[2]*rinv3;
01670 
01671     rinv3 = 1/(dl2*dl);
01672     F[0] += d[0]*rinv3;
01673     F[1] += d[1]*rinv3;
01674     F[2] += d[2]*rinv3;
01675 
01676     // fprintf(stderr, "explcit force %f %f %f\n", F[0], F[1], F[2]);
01677   }
01678 
01679   for (i = 0; i < 3; i++)
01680     force[i] += pref*F[i];
01681 }
01682 
01683 MDINLINE double calc_mmm2d_copy_pair_energy(double d[3])
01684 {
01685   double eng;
01686   double z2     = d[2]*d[2];
01687   double rho2   = d[1]*d[1] + z2;
01688 
01689   /* the ux is multiplied in below */
01690   eng = -2*log(4*M_PI*uy*box_l[0]);
01691 
01692   /* Bessel sum */
01693   {
01694     int p, l;
01695     double k0Sum;
01696     double freq;
01697     double rho_l, ypl;
01698     double c;
01699 
01700     for (p = 1; p < besselCutoff.n; p++) {
01701       k0Sum  = 0;
01702 
01703       freq = C_2PI*ux*p;
01704 
01705       for (l = 1; l < besselCutoff.e[p-1]; l++) {
01706         ypl   = d[1] + l*box_l[1];
01707         rho_l = sqrt(ypl*ypl + z2);
01708         k0Sum  += K0(freq*rho_l);
01709 
01710         ypl   = d[1] - l*box_l[1];
01711         rho_l = sqrt(ypl*ypl + z2);
01712         k0Sum  += K0(freq*rho_l);
01713       }
01714 
01715       /* the ux is multiplied in to bessel, complex and psi at once, not here */
01716       c = 4*cos(freq*d[0]);
01717       eng += c*k0Sum;
01718     }
01719     // fprintf(stderr, " bessel energy %f\n", eng);
01720   }
01721 
01722   /* complex sum */
01723   {
01724     double zeta_r, zeta_i;
01725     double zet2_r, zet2_i;
01726     double ztn_r,  ztn_i;
01727     double tmp_r;
01728     int end, n;
01729 
01730     ztn_r = zeta_r = uy*d[2];
01731     ztn_i = zeta_i = uy*d[1];
01732 
01733     zet2_r = zeta_r*zeta_r - zeta_i*zeta_i;
01734     zet2_i = 2*zeta_r*zeta_i;
01735 
01736     ztn_r = zet2_r;
01737     ztn_i = zet2_i;
01738 
01739     end = (int)ceil(COMPLEX_FAC*uy2*rho2);
01740     if (end > COMPLEX_STEP) {
01741         end = COMPLEX_STEP;
01742         fprintf(stderr, "MMM2D: some particles left the assumed slab, precision might be lost\n");
01743     }
01744     end = complexCutoff[end];
01745     for (n = 1; n <= end; n++) {
01746       eng -= box_l[1]/(2*n)*bon.e[n-1]*ztn_r;
01747  
01748       tmp_r = ztn_r*zet2_r - ztn_i*zet2_i;
01749       ztn_i = ztn_r*zet2_i + ztn_i*zet2_r;
01750       ztn_r = tmp_r;
01751     }
01752     // fprintf(stderr, "complex energy %f %d\n", eng, end);
01753   }
01754 
01755   /* psi sum */
01756   {
01757     int n;
01758     double add;
01759     double uxx = ux*d[0];
01760     double uxrho2 = ux2*rho2;
01761     double uxrho_2n;
01762 
01763     /* n = 0 inflicts only Fx and pot */
01764     /* one ux is multiplied in to bessel, complex and psi at once, not here */
01765     eng -= mod_psi_even(0, uxx);
01766 
01767     uxrho_2n = uxrho2;
01768     for (n = 1; n < n_modPsi; n++) {
01769       add = uxrho_2n*mod_psi_even(n, uxx);
01770       eng -= add;
01771       if (fabs(add) < part_error)
01772         break;
01773       uxrho_2n *= uxrho2;
01774     }
01775     // fprintf(stderr, "    psi energy %f %d\n", eng, n);
01776   }
01777 
01778   eng *= ux;
01779 
01780   /* explicitly added potentials r_{-1,0} and r_{1,0} */
01781   {
01782     double cx   = d[0] + box_l[0];
01783     double rinv = sqrt(1.0/(cx*cx + rho2));
01784     eng += rinv;
01785 
01786     cx   = d[0] - box_l[0];
01787     rinv = sqrt(1.0/(cx*cx + rho2));
01788     eng += rinv;
01789 
01790     // fprintf(stderr, "explcit energy %f %f %f %f\n", d[0], d[1], d[2], eng);
01791   }
01792 
01793   return eng;
01794 }
01795 
01796 double mmm2d_coulomb_pair_energy(double charge_factor,
01797                                  double dv[3], double d2, double d)
01798 {
01799   double eng, pref = coulomb.prefactor*charge_factor;
01800   if (pref != 0.0) {
01801     eng = calc_mmm2d_copy_pair_energy(dv);
01802     return pref*(eng + 1/d);
01803   }
01804   return 0.0;
01805 }
01806 
01807 void MMM2D_self_energy()
01808 {
01809   int c, np, i;
01810   Particle *part;
01811   double dv[3] = {0, 0, 0};
01812   double seng = coulomb.prefactor*calc_mmm2d_copy_pair_energy(dv);
01813   self_energy = 0;
01814 
01815   /* this one gives twice the real self energy, as it is used
01816      in the far formula which counts everything twice and in
01817      the end divides by two*/
01818 
01819   // fprintf(stderr, "%d: self energy %g\n", this_node, seng);
01820 
01821   for (c = 0; c < local_cells.n; c++) {
01822     np   = local_cells.cell[c]->n;
01823     part = local_cells.cell[c]->part;
01824 
01825     for (i = 0; i < np; i++) {
01826       self_energy += seng*SQR(part[i].p.q);
01827      }
01828   }
01829 }
01830 
01831 /****************************************
01832  * COMMON PARTS
01833  ****************************************/
01834 
01835 int MMM2D_set_params(double maxPWerror, double far_cut, double delta_top, double delta_bot)
01836 {
01837   int err;
01838 
01839   if (cell_structure.type != CELL_STRUCTURE_NSQUARE &&
01840       cell_structure.type != CELL_STRUCTURE_LAYERED) {
01841     return ES_ERROR;
01842   }
01843 
01844   mmm2d_params.maxPWerror = maxPWerror;
01845     
01846   if (delta_top != 0.0 || delta_bot != 0.0) {
01847     mmm2d_params.dielectric_contrast_on = 1;
01848     mmm2d_params.delta_mid_top = delta_top;
01849     mmm2d_params.delta_mid_bot = delta_bot;
01850     mmm2d_params.delta_mult = delta_top*delta_bot;
01851   }
01852   else {
01853     mmm2d_params.dielectric_contrast_on = 0;
01854     mmm2d_params.delta_mid_top = 0;
01855     mmm2d_params.delta_mid_bot = 0;
01856     mmm2d_params.delta_mult = 0;
01857   }
01858 
01859   MMM2D_setup_constants();
01860 
01861   if ((err = MMM2D_tune_near(maxPWerror)))
01862     return err;
01863 
01864   /* if we cannot do the far formula, force off */
01865   if (cell_structure.type == CELL_STRUCTURE_NSQUARE ||
01866       (cell_structure.type == CELL_STRUCTURE_LAYERED && n_nodes*n_layers < 3)) {
01867     mmm2d_params.far_cut = 0.0;
01868     if (mmm2d_params.dielectric_contrast_on) {
01869       return ERROR_ICL;
01870     }
01871   }
01872   else {
01873     mmm2d_params.far_cut = far_cut;
01874     mmm2d_params.far_cut2 = SQR(far_cut);
01875     if (mmm2d_params.far_cut > 0)
01876       mmm2d_params.far_calculated = 0;
01877     else {
01878       if ((err = MMM2D_tune_far(maxPWerror)))
01879         return err;
01880       mmm2d_params.far_calculated = 1;    
01881     }
01882   }
01883 
01884   coulomb.method = COULOMB_MMM2D;
01885 
01886   mpi_bcast_coulomb_params();
01887 
01888   return ES_OK;
01889 }
01890 
01891 int MMM2D_sanity_checks()
01892 {
01893   char *errtxt;
01894 
01895   if (!PERIODIC(0) || !PERIODIC(1) || PERIODIC(2)) {
01896     errtxt = runtime_error(128);
01897     ERROR_SPRINTF(errtxt, "{025 MMM2D requires periodicity 1 1 0} ");
01898     return 1;
01899   }
01900   
01901   if (cell_structure.type != CELL_STRUCTURE_LAYERED &&
01902       cell_structure.type != CELL_STRUCTURE_NSQUARE) {
01903     errtxt = runtime_error(128);
01904     ERROR_SPRINTF(errtxt, "{026 MMM2D at present requires layered (or n-square) cellsystem} ");
01905     return 1;
01906   }
01907   return 0;
01908 }
01909 
01910 void MMM2D_init()
01911 {
01912   int err;
01913   char *errtxt;
01914 
01915   if (MMM2D_sanity_checks()) return;
01916 
01917   MMM2D_setup_constants();
01918   if ((err = MMM2D_tune_near(mmm2d_params.maxPWerror))) {
01919     errtxt = runtime_error(128);
01920     ERROR_SPRINTF(errtxt, "{027 MMM2D auto-retuning: %s} ", mmm2d_errors[err]);
01921     coulomb.method = COULOMB_NONE;
01922     return;
01923   }
01924   if (cell_structure.type == CELL_STRUCTURE_NSQUARE ||
01925       (cell_structure.type == CELL_STRUCTURE_LAYERED && n_nodes*n_layers < 3)) {
01926     mmm2d_params.far_cut = 0.0;
01927     if (mmm2d_params.dielectric_contrast_on) {
01928       errtxt = runtime_error(128);
01929       ERROR_SPRINTF(errtxt, "{027 MMM2D auto-retuning: IC requires layered cellsystem with > 3 layers} ");
01930     }
01931   }
01932   else {
01933     if (mmm2d_params.far_calculated) {
01934       if ((err = MMM2D_tune_far(mmm2d_params.maxPWerror))) {
01935         errtxt = runtime_error(128);
01936         ERROR_SPRINTF(errtxt, "{028 MMM2D auto-retuning: %s} ", mmm2d_errors[err]);
01937         coulomb.method = COULOMB_NONE;
01938         return;
01939       }
01940     }
01941   }
01942 }
01943 
01944 void MMM2D_on_resort_particles()
01945 {
01946   /* if we need MMM2D far formula, allocate caches */
01947   if (cell_structure.type == CELL_STRUCTURE_LAYERED) {
01948     n_localpart = cells_get_n_particles();
01949     n_scxcache = (int)(ceil(mmm2d_params.far_cut/ux) + 1);
01950     n_scycache = (int)(ceil(mmm2d_params.far_cut/uy) + 1);
01951     scxcache = realloc(scxcache, n_scxcache*n_localpart*sizeof(SCCache));
01952     scycache = realloc(scycache, n_scycache*n_localpart*sizeof(SCCache));
01953     
01954     partblk   = realloc(partblk,  n_localpart*8*sizeof(double));
01955     lclcblk   = realloc(lclcblk,  n_cells*8*sizeof(double));
01956     gblcblk   = realloc(gblcblk,  n_layers*8*sizeof(double));
01957   }
01958   MMM2D_self_energy();
01959 }
01960 
01961 
01962 void  MMM2D_dielectric_layers_force_contribution()
01963 {
01964   int c, i, j;
01965   Cell  *celll;
01966   int      npl;
01967   Particle *pl, *p1;
01968   double dist2, d[3];
01969   double charge_factor;
01970   double a[3];
01971   double force[3]={0, 0, 0};
01972 
01973   if (!mmm2d_params.dielectric_contrast_on) return;
01974 
01975   if(this_node==0) {
01976     c=1;
01977     celll = &cells[c];
01978     pl    = celll->part;
01979     npl   = celll->n;
01980         
01981     for(i = 0; i < npl; i++) {
01982       // printf("enter mmm2d_dielectric %f \n",my_left[2]);
01983       force[0]=0;force[1]=0;force[2]=0;
01984       p1 = &pl[i];
01985       for(j = 0; j < npl; j++) {
01986         a[0]=pl[j].r.p[0]; a[1]=pl[j].r.p[1];a[2]=-pl[j].r.p[2];
01987         layered_get_mi_vector(d, p1->r.p, a);
01988         dist2 = sqrlen(d);
01989         charge_factor=p1->p.q*pl[j].p.q*mmm2d_params.delta_mid_bot;
01990         add_mmm2d_coulomb_pair_force(charge_factor, d, sqrt(dist2), dist2, force);
01991       }
01992       for (j = 0; j < 3; j++) { 
01993         p1->f.f[j] += force[j];
01994       }
01995     }
01996   }
01997   
01998   if(this_node==n_nodes-1) {
01999   
02000     c=n_layers;
02001     celll = &cells[c];
02002     pl    = celll->part;
02003     npl   = celll->n;
02004     for(i = 0; i < npl; i++) {
02005       force[0]=0;force[1]=0;force[2]=0;
02006       p1 = &pl[i];
02007       for(j = 0; j < npl; j++) {
02008         a[0]=pl[j].r.p[0];  a[1]=pl[j].r.p[1]; a[2]=2*box_l[2]-pl[j].r.p[2];
02009         layered_get_mi_vector(d, p1->r.p, a);
02010         dist2 = sqrlen(d);
02011         charge_factor=p1->p.q*pl[j].p.q*mmm2d_params.delta_mid_top; 
02012         add_mmm2d_coulomb_pair_force(charge_factor, d, sqrt(dist2), dist2, force);
02013       }
02014       for (j = 0; j < 3; j++) { 
02015         p1->f.f[j] += force[j];
02016       }
02017     }
02018   }
02019 
02020 }
02021 
02022 double  MMM2D_dielectric_layers_energy_contribution()
02023 {
02024   int c, i, j;
02025   Cell  *celll;
02026   int      npl;
02027   Particle *pl, *p1;
02028   double dist2, d[3];
02029   double charge_factor;
02030   double a[3];
02031   double eng=0.0;
02032   // prefactor for the charged plate interaction removal
02033   double corr_pref = coulomb.prefactor*C_2PI*ux*uy;
02034 
02035   if (!mmm2d_params.dielectric_contrast_on) return 0.0;
02036 
02037   if(this_node==0) {
02038     c=1;
02039     celll = &cells[c];
02040     pl    = celll->part;
02041     npl   = celll->n;
02042     
02043     for(i = 0; i < npl; i++) {
02044       p1 = &pl[i];
02045       for(j = 0; j < npl; j++) {
02046         a[0]=pl[j].r.p[0];  a[1]=pl[j].r.p[1];a[2]=-pl[j].r.p[2];
02047         layered_get_mi_vector(d, p1->r.p, a);
02048         dist2 = sqrlen(d);
02049         charge_factor = mmm2d_params.delta_mid_bot*p1->p.q*pl[j].p.q;
02050         eng+=mmm2d_coulomb_pair_energy(charge_factor, d, dist2, sqrt(dist2)) + corr_pref*charge_factor*d[2];
02051       }
02052     }
02053   }
02054   
02055   if(this_node==n_nodes-1) {
02056     c=n_layers;
02057     celll = &cells[c];
02058     pl    = celll->part;
02059     npl   = celll->n;
02060     for(i = 0; i < npl; i++) {
02061       p1 = &pl[i];
02062       for(j = 0; j < npl; j++) {
02063         a[0]=pl[j].r.p[0];  a[1]=pl[j].r.p[1]; a[2]=2*box_l[2]-pl[j].r.p[2];
02064         layered_get_mi_vector(d, p1->r.p, a);
02065         dist2 = sqrlen(d);
02066         charge_factor=mmm2d_params.delta_mid_top*p1->p.q*pl[j].p.q;
02067         eng+=mmm2d_coulomb_pair_energy(charge_factor, d, dist2, sqrt(dist2)) - corr_pref*charge_factor*d[2];
02068       }
02069     }
02070   }
02071   return 0.5*eng;
02072 }
02073 
02074 #endif