![]() |
ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
|
00001 /* 00002 Copyright (C) 2010,2012,2013 The ESPResSo project 00003 Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 00004 Max-Planck-Institute for Polymer Research, Theory Group 00005 00006 This file is part of ESPResSo. 00007 00008 ESPResSo is free software: you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation, either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 ESPResSo is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program. If not, see <http://www.gnu.org/licenses/>. 00020 */ 00021 /** \file mmm2d.c MMM2D algorithm for long range coulomb interaction. 00022 * 00023 * For more information about MMM2D, see \ref mmm2d.h "mmm2d.h". 00024 */ 00025 00026 #include <math.h> 00027 #include <mpi.h> 00028 #include "utils.h" 00029 #include "communication.h" 00030 #include "grid.h" 00031 #include "particle_data.h" 00032 #include "interaction_data.h" 00033 #include "cells.h" 00034 #include "mmm2d.h" 00035 #include "mmm-common.h" 00036 #include "specfunc.h" 00037 #include "integrate.h" 00038 #include "layered.h" 00039 00040 #ifdef ELECTROSTATICS 00041 char const *mmm2d_errors[] = { 00042 "ok", 00043 "Layer height too large for MMM2D near formula, increase n_layers", 00044 "box_l[1]/box_l[0] too large for MMM2D near formula, please exchange x and y", 00045 "Could find not reasonable Bessel cutoff. Please decrease n_layers or the error bound", 00046 "Could find not reasonable Polygamma cutoff. Consider exchanging x and y", 00047 "Far cutoff too large, decrease the error bound", 00048 "Layer height too small for MMM2D far formula, decrease n_layers or skin", 00049 "IC requires layered cellsystem with more than 3 layers", 00050 }; 00051 00052 /** if you define this, the Besselfunctions are calculated up 00053 to machine precision, otherwise 10^-14, which should be 00054 definitely enough for daily life. */ 00055 #undef BESSEL_MACHINE_PREC 00056 00057 // #define CHECKPOINTS 00058 #if 0 00059 #define LOG_FORCES(x) x 00060 #else 00061 #define LOG_FORCES(x) 00062 #endif 00063 00064 #ifndef BESSEL_MACHINE_PREC 00065 #define K0 LPK0 00066 #define K1 LPK1 00067 #endif 00068 00069 /**************************************** 00070 * LOCAL DEFINES 00071 ****************************************/ 00072 00073 /** Largest reasonable cutoff for far formula. A double cannot overflow 00074 with this value. */ 00075 #define MAXIMAL_FAR_CUT 100 00076 00077 /** Largest reasonable cutoff for Bessel function. The Bessel functions 00078 are quite slow, so do not make too large. */ 00079 #define MAXIMAL_B_CUT 50 00080 00081 /** Largest reasonable order of polygamma series. These are pretty fast, 00082 so use more of them. Also, the real cutoff is determined at run time, 00083 so normally we are faster */ 00084 #define MAXIMAL_POLYGAMMA 100 00085 00086 /** internal relative precision of far formula. This controls how many 00087 p,q vectors are done at once. This has nothing to do with the effective 00088 precision, but rather controls how different values can be we add up without 00089 loosing the smallest values. In principle one could choose smaller values, but 00090 that would not make things faster */ 00091 #define FARRELPREC 1e-6 00092 00093 /** number of steps in the complex cutoff table */ 00094 #define COMPLEX_STEP 16 00095 /** map numbers from 0 to 1/2 onto the complex cutoff table 00096 (with security margin) */ 00097 #define COMPLEX_FAC (COMPLEX_STEP/(.5 + 0.01)) 00098 00099 /**************************************** 00100 * LOCAL VARIABLES 00101 ****************************************/ 00102 00103 /** up to that error the sums in the NF are evaluated */ 00104 static double part_error; 00105 00106 /** cutoffs for the bessel sum */ 00107 static IntList besselCutoff = {NULL, 0, 0}; 00108 00109 /** cutoffs for the complex sum */ 00110 static int complexCutoff[COMPLEX_STEP + 1]; 00111 /** bernoulli numbers divided by n */ 00112 static DoubleList bon = {NULL, 0, 0}; 00113 00114 /** inverse box dimensions */ 00115 /*@{*/ 00116 static double ux, ux2, uy, uy2, uz; 00117 /*@}*/ 00118 00119 /** maximal z for near formula, minimal z for far formula. 00120 Is identical in the theory, but with the verlet tricks 00121 this is no longer true, the skin has to be added/subtracted */ 00122 /*@{*/ 00123 static double max_near, min_far; 00124 /*@}*/ 00125 00126 /// 00127 static double self_energy; 00128 00129 MMM2D_struct mmm2d_params = { 1e100, 10, 1, 0, 0, 1, 1, 1 }; 00130 00131 /** return codes for \ref MMM2D_tune_near and \ref MMM2D_tune_far */ 00132 /*@{*/ 00133 /** cell too large */ 00134 #define ERROR_LARGE 1 00135 /** box too large */ 00136 #define ERROR_BOXL 2 00137 /** no reasonable bessel cutoff found */ 00138 #define ERROR_BESSEL 3 00139 /** no reasonable polygamma cutoff found */ 00140 #define ERROR_POLY 4 00141 /** no reasonable cutoff for the far formula found */ 00142 #define ERROR_FARC 5 00143 /** cell too small */ 00144 #define ERROR_SMALL 6 00145 /** IC layer requirement */ 00146 #define ERROR_ICL 7 00147 /*@}*/ 00148 00149 /**************************************** 00150 * LOCAL ARRAYS 00151 ****************************************/ 00152 00153 /** \name Product decomposition data organization 00154 For the cell blocks 00155 it is assumed that the lower blocks part is in the lower half. 00156 This has to have positive sign, so that has to be first. */ 00157 /*@{*/ 00158 00159 #define POQESP 0 00160 #define POQECP 1 00161 #define POQESM 2 00162 #define POQECM 3 00163 00164 #define PQESSP 0 00165 #define PQESCP 1 00166 #define PQECSP 2 00167 #define PQECCP 3 00168 #define PQESSM 4 00169 #define PQESCM 5 00170 #define PQECSM 6 00171 #define PQECCM 7 00172 00173 #define QQEQQP 0 00174 #define QQEQQM 1 00175 00176 #define ABEQQP 0 00177 #define ABEQZP 1 00178 #define ABEQQM 2 00179 #define ABEQZM 3 00180 00181 /*@}*/ 00182 00183 /** number of local particles */ 00184 static int n_localpart = 0; 00185 00186 /** temporary buffers for product decomposition */ 00187 static double *partblk = NULL; 00188 /** for all local cells including ghosts */ 00189 static double *lclcblk = NULL; 00190 /** collected data from the cells above the top neighbor 00191 of a cell rsp. below the bottom neighbor 00192 (P=below, M=above, as the signs in the exp). */ 00193 static double *gblcblk = NULL; 00194 00195 /** contribution from the image charges */ 00196 static double lclimge[8]; 00197 00198 typedef struct { 00199 double s, c; 00200 } SCCache; 00201 00202 /** sin/cos caching */ 00203 static SCCache *scxcache = NULL; 00204 static int n_scxcache; 00205 /** sin/cos caching */ 00206 static SCCache *scycache = NULL; 00207 static int n_scycache; 00208 00209 00210 /** \name Local functions for the near formula */ 00211 /************************************************************/ 00212 /*@{*/ 00213 00214 /** complex evaluation */ 00215 static void prepareBernoulliNumbers(int nmax); 00216 00217 /** cutoff error setup. Returns error code */ 00218 static int MMM2D_tune_near(double error); 00219 00220 /** energy of all local particles with their copies */ 00221 void MMM2D_self_energy(); 00222 00223 /*@}*/ 00224 00225 /** \name Local functions for the far formula */ 00226 /************************************************************/ 00227 /*@{*/ 00228 00229 /** sin/cos storage */ 00230 static void prepare_scx_cache(); 00231 static void prepare_scy_cache(); 00232 /** clear the image contributions if there is no dielectric contrast and no image charges */ 00233 static void clear_image_contributions(int size); 00234 /** gather the informations for the far away image charges */ 00235 static void gather_image_contributions(int size); 00236 /** spread the top/bottom sums */ 00237 static void distribute(int size, double fac); 00238 /** 2 pi |z| code */ 00239 static void setup_z_force(); 00240 static void setup_z_energy(); 00241 static void add_z_force(); 00242 static double z_energy(); 00243 /** p=0 per frequency code */ 00244 static void setup_P(int p, double omega, double fac); 00245 static void add_P_force(); 00246 static double P_energy(double omega); 00247 /** q=0 per frequency code */ 00248 static void setup_Q(int q, double omega, double fac); 00249 static void add_Q_force(); 00250 static double Q_energy(double omega); 00251 /** p,q <> 0 per frequency code */ 00252 static void setup_PQ(int p, int q, double omega, double fac); 00253 static void add_PQ_force(int p, int q, double omega); 00254 static double PQ_energy(double omega); 00255 00256 /** cutoff error setup. Returns error code */ 00257 static int MMM2D_tune_far(double error); 00258 00259 /*@}*/ 00260 00261 /// 00262 void MMM2D_setup_constants() 00263 { 00264 ux = 1/box_l[0]; 00265 ux2 = ux*ux; 00266 uy = 1/box_l[1]; 00267 uy2 = uy*uy; 00268 uz = 1/box_l[2]; 00269 00270 switch (cell_structure.type) { 00271 case CELL_STRUCTURE_NSQUARE: 00272 max_near = box_l[2]; 00273 /* not used */ 00274 min_far = 0.0; 00275 break; 00276 case CELL_STRUCTURE_LAYERED: 00277 max_near = 2*layer_h + skin; 00278 min_far = layer_h - skin; 00279 break; 00280 default: 00281 fprintf(stderr, "%d: INTERNAL ERROR: MMM2D setup for cell structure it should reject\n", this_node); 00282 errexit(); 00283 } 00284 } 00285 00286 /**************************************** 00287 * FAR FORMULA 00288 ****************************************/ 00289 00290 static void prepare_scx_cache() 00291 { 00292 int np, c, i, ic, freq, o; 00293 double pref, arg; 00294 Particle *part; 00295 00296 for (freq = 1; freq <= n_scxcache; freq++) { 00297 pref = C_2PI*ux*freq; 00298 o = (freq-1)*n_localpart; 00299 ic = 0; 00300 for (c = 1; c <= n_layers; c++) { 00301 np = cells[c].n; 00302 part = cells[c].part; 00303 for (i = 0; i < np; i++) { 00304 arg = pref*part[i].r.p[0]; 00305 scxcache[o + ic].s = sin(arg); 00306 scxcache[o + ic].c = cos(arg); 00307 ic++; 00308 } 00309 } 00310 } 00311 } 00312 00313 static void prepare_scy_cache() 00314 { 00315 int np, c, i, ic, freq, o; 00316 double pref, arg; 00317 Particle *part; 00318 00319 for (freq = 1; freq <= n_scycache; freq++) { 00320 pref = C_2PI*uy*freq; 00321 o = (freq-1)*n_localpart; 00322 ic = 0; 00323 for (c = 1; c <= n_layers; c++) { 00324 np = cells[c].n; 00325 part = cells[c].part; 00326 for (i = 0; i < np; i++) { 00327 arg = pref*part[i].r.p[1]; 00328 scycache[o + ic].s = sin(arg); 00329 scycache[o + ic].c = cos(arg); 00330 ic++; 00331 } 00332 } 00333 } 00334 } 00335 00336 /*****************************************************************/ 00337 /* data distribution */ 00338 /*****************************************************************/ 00339 00340 /* vector operations */ 00341 00342 /** pdc = 0 */ 00343 MDINLINE void clear_vec(double *pdc, int size) 00344 { 00345 int i; 00346 for (i = 0; i < size; i++) 00347 pdc[i] = 0; 00348 } 00349 00350 /** pdc_d = pdc_s */ 00351 MDINLINE void copy_vec(double *pdc_d, double *pdc_s, int size) 00352 { 00353 int i; 00354 for (i = 0; i < size; i++) 00355 pdc_d[i] = pdc_s[i]; 00356 } 00357 00358 /** pdc_d = pdc_s1 + pdc_s2 */ 00359 MDINLINE void add_vec(double *pdc_d, double *pdc_s1, double *pdc_s2, int size) 00360 { 00361 int i; 00362 for (i = 0; i < size; i++) 00363 pdc_d[i] = pdc_s1[i] + pdc_s2[i]; 00364 } 00365 00366 /** pdc_d = scale*pdc_s1 + pdc_s2 */ 00367 MDINLINE void addscale_vec(double *pdc_d, double scale, double *pdc_s1, double *pdc_s2, int size) 00368 { 00369 int i; 00370 for (i = 0; i < size; i++) 00371 pdc_d[i] = scale*pdc_s1[i] + pdc_s2[i]; 00372 } 00373 00374 /** pdc_d = scale*pdc */ 00375 MDINLINE void scale_vec(double scale, double *pdc, int size) 00376 { 00377 int i; 00378 for (i = 0; i < size; i++) 00379 pdc[i] *= scale; 00380 } 00381 00382 /* block indexing - has to fit to the PQ block definitions above. 00383 size gives the full size of the data block, 00384 e_size is the size of only the top or bottom half, i.e. half of size. 00385 */ 00386 00387 MDINLINE double *block(double *p, int index, int size) 00388 { 00389 return &p[index*size]; 00390 } 00391 00392 MDINLINE double *blwentry(double *p, int index, int e_size) 00393 { 00394 return &p[2*index*e_size]; 00395 } 00396 00397 MDINLINE double *abventry(double *p, int index, int e_size) 00398 { 00399 return &p[(2*index + 1)*e_size]; 00400 } 00401 00402 /* dealing with the image contributions from far outside the simulation box */ 00403 00404 void clear_image_contributions(int e_size) 00405 { 00406 if (this_node == 0) 00407 /* the gblcblk contains all contributions from layers deeper than one layer below our system, 00408 which is precisely what the gblcblk should contain for the lowest layer. */ 00409 clear_vec(blwentry(gblcblk, 0, e_size), e_size); 00410 00411 if (this_node == n_nodes - 1) 00412 /* same for the top node */ 00413 clear_vec(abventry(gblcblk, n_layers - 1, e_size), e_size); 00414 } 00415 00416 void gather_image_contributions(int e_size) 00417 { 00418 double recvbuf[8]; 00419 00420 /* collect the image charge contributions with at least a layer distance */ 00421 MPI_Allreduce(lclimge, recvbuf, 2*e_size, MPI_DOUBLE, MPI_SUM, comm_cart); 00422 00423 if (this_node == 0) 00424 /* the gblcblk contains all contributions from layers deeper than one layer below our system, 00425 which is precisely what the gblcblk should contain for the lowest layer. */ 00426 copy_vec(blwentry(gblcblk, 0, e_size),recvbuf, e_size); 00427 00428 if (this_node == n_nodes - 1) 00429 /* same for the top node */ 00430 copy_vec(abventry(gblcblk, n_layers - 1, e_size), recvbuf + e_size, e_size); 00431 } 00432 00433 /* the data transfer routine for the lclcblks itself */ 00434 void distribute(int e_size, double fac) 00435 { 00436 int c, node, inv_node; 00437 double sendbuf[8]; 00438 double recvbuf[8]; 00439 MPI_Status status; 00440 00441 /* send/recv to/from other nodes. Also builds up the gblcblk. */ 00442 for (node = 0; node < n_nodes; node++) { 00443 inv_node = n_nodes - node - 1; 00444 /* up */ 00445 if (node == this_node) { 00446 /* calculate sums of cells below */ 00447 for (c = 1; c < n_layers; c++) 00448 addscale_vec(blwentry(gblcblk, c, e_size), fac, blwentry(gblcblk, c - 1, e_size), blwentry(lclcblk, c - 1, e_size), e_size); 00449 00450 /* calculate my ghost contribution only if a node above exists */ 00451 if (node + 1 < n_nodes) { 00452 addscale_vec(sendbuf, fac, blwentry(gblcblk, n_layers - 1, e_size), blwentry(lclcblk, n_layers - 1, e_size), e_size); 00453 copy_vec(sendbuf + e_size, blwentry(lclcblk, n_layers, e_size), e_size); 00454 MPI_Send(sendbuf, 2*e_size, MPI_DOUBLE, node + 1, 0, comm_cart); 00455 } 00456 } 00457 else if (node + 1 == this_node) { 00458 MPI_Recv(recvbuf, 2*e_size, MPI_DOUBLE, node, 0, comm_cart, &status); 00459 copy_vec(blwentry(gblcblk, 0, e_size), recvbuf, e_size); 00460 copy_vec(blwentry(lclcblk, 0, e_size), recvbuf + e_size, e_size); 00461 } 00462 00463 /* down */ 00464 if (inv_node == this_node) { 00465 /* calculate sums of all cells above */ 00466 for (c = n_layers + 1; c > 2; c--) 00467 addscale_vec(abventry(gblcblk, c - 3, e_size), fac, abventry(gblcblk, c - 2, e_size), abventry(lclcblk, c, e_size), e_size); 00468 00469 /* calculate my ghost contribution only if a node below exists */ 00470 if (inv_node - 1 >= 0) { 00471 addscale_vec(sendbuf, fac, abventry(gblcblk, 0, e_size), abventry(lclcblk, 2, e_size), e_size); 00472 copy_vec(sendbuf + e_size, abventry(lclcblk, 1, e_size), e_size); 00473 MPI_Send(sendbuf, 2*e_size, MPI_DOUBLE, inv_node - 1, 0, comm_cart); 00474 } 00475 } 00476 else if (inv_node - 1 == this_node) { 00477 MPI_Recv(recvbuf, 2*e_size, MPI_DOUBLE, inv_node, 0, comm_cart, &status); 00478 copy_vec(abventry(gblcblk, n_layers - 1, e_size), recvbuf, e_size); 00479 copy_vec(abventry(lclcblk, n_layers + 1, e_size), recvbuf + e_size, e_size); 00480 } 00481 } 00482 } 00483 00484 #ifdef CHECKPOINTS 00485 static void checkpoint(char *text, int p, int q, int e_size) 00486 { 00487 int c, i; 00488 fprintf(stderr, "%d: %s %d %d\n", this_node, text, p, q); 00489 00490 fprintf(stderr, "partblk\n"); 00491 for (c = 0; c < n_localpart; c++) { 00492 fprintf(stderr, "%d", c); 00493 for (i = 0; i < e_size; i++) 00494 fprintf(stderr, " %10.3g", block(partblk, c, 2*e_size)[i]); 00495 fprintf(stderr, " m"); 00496 for (i = 0; i < e_size; i++) 00497 fprintf(stderr, " %10.3g", block(partblk, c, 2*e_size)[i + e_size]); 00498 fprintf(stderr, "\n"); 00499 } 00500 fprintf(stderr, "\n"); 00501 00502 fprintf(stderr, "lclcblk\n"); 00503 fprintf(stderr, "0"); 00504 for (i = 0; i < e_size; i++) 00505 fprintf(stderr, " %10.3g", block(lclcblk, 0, 2*e_size)[i]); 00506 fprintf(stderr, "\n"); 00507 for (c = 1; c <= n_layers; c++) { 00508 fprintf(stderr, "%d", c); 00509 for (i = 0; i < e_size; i++) 00510 fprintf(stderr, " %10.3g", block(lclcblk, c, 2*e_size)[i]); 00511 fprintf(stderr, " m"); 00512 for (i = 0; i < e_size; i++) 00513 fprintf(stderr, " %10.3g", block(lclcblk, c, 2*e_size)[i + e_size]); 00514 fprintf(stderr, "\n"); 00515 } 00516 fprintf(stderr, "%d", n_layers + 1); 00517 for (i = 0; i < e_size; i++) 00518 fprintf(stderr, " "); 00519 fprintf(stderr, " m"); 00520 00521 for (i = 0; i < e_size; i++) 00522 fprintf(stderr, " %10.3g", block(lclcblk, n_layers + 1, 2*e_size)[i + e_size]); 00523 fprintf(stderr, "\n"); 00524 00525 fprintf(stderr, "gblcblk\n"); 00526 for (c = 0; c < n_layers; c++) { 00527 fprintf(stderr, "%d", c + 1); 00528 for (i = 0; i < e_size; i++) 00529 fprintf(stderr, " %10.3g", block(gblcblk, c, 2*e_size)[i]); 00530 fprintf(stderr, " m"); 00531 for (i = 0; i < e_size; i++) 00532 fprintf(stderr, " %10.3g", block(gblcblk, c, 2*e_size)[i + e_size]); 00533 fprintf(stderr, "\n"); 00534 } 00535 fprintf(stderr, "\n"); 00536 } 00537 #else 00538 #define checkpoint(text,p,q,size) 00539 #endif 00540 00541 /*****************************************************************/ 00542 /* 2 pi (sign)(z) */ 00543 /*****************************************************************/ 00544 00545 static void setup_z_force() 00546 { 00547 int np, c, i; 00548 double pref = coulomb.prefactor*C_2PI*ux*uy; 00549 Particle *part; 00550 double *lclimgebot=NULL,*lclimgetop=NULL; 00551 int e_size=1,size = 2; 00552 double e, e_di_l, e_di_h; 00553 00554 double fac_imgsum; 00555 00556 /* in case of metallic boundary conditions on both sides, we get an infinite array, 00557 which only exists for charge neutral systems. But in this case, we can as well 00558 not sum up the force array, as the net force per image is 0 */ 00559 if (mmm2d_params.delta_mult != 1.0) { 00560 fac_imgsum = 1/(1 - mmm2d_params.delta_mult); 00561 } 00562 else { 00563 fac_imgsum = 0; 00564 } 00565 00566 if (mmm2d_params.dielectric_contrast_on) 00567 clear_vec(lclimge, size); 00568 00569 if(this_node==0) { 00570 00571 lclimgebot=blwentry(lclcblk,0,e_size); 00572 clear_vec(lclimgebot, e_size); 00573 } 00574 00575 if(this_node==n_nodes-1) { 00576 00577 lclimgetop=abventry(lclcblk,n_layers+1,e_size); 00578 clear_vec(lclimgetop, e_size); 00579 } 00580 00581 /* calculate local cellblks. partblks don't make sense */ 00582 for (c = 1; c <= n_layers; c++) { 00583 np = cells[c].n; 00584 part = cells[c].part; 00585 lclcblk[size*c] = 0; 00586 for (i = 0; i < np; i++) { 00587 lclcblk[size*c] += part[i].p.q; 00588 00589 if (mmm2d_params.dielectric_contrast_on) { 00590 e_di_l = (mmm2d_params.delta_mult*mmm2d_params.delta_mid_bot 00591 + mmm2d_params.delta_mult)*fac_imgsum; 00592 if (c==1 && this_node==0) { 00593 e = mmm2d_params.delta_mid_bot; 00594 lclimgebot[QQEQQP] += part[i].p.q*e; 00595 } 00596 else 00597 e_di_l += mmm2d_params.delta_mid_bot; 00598 00599 e_di_h = (mmm2d_params.delta_mult*mmm2d_params.delta_mid_top 00600 + mmm2d_params.delta_mult)*fac_imgsum; 00601 00602 if (c==n_layers && this_node==n_nodes-1) { 00603 e = mmm2d_params.delta_mid_top; 00604 lclimgetop[QQEQQP] += part[i].p.q*e; 00605 } 00606 else 00607 e_di_h += mmm2d_params.delta_mid_top; 00608 00609 lclimge[QQEQQP] += part[i].p.q*e_di_l; 00610 lclimge[QQEQQM] += part[i].p.q*e_di_h; 00611 } 00612 } 00613 lclcblk[size*c] *= pref; 00614 lclcblk[size*c+1] = lclcblk[size*c]; 00615 } 00616 00617 if (mmm2d_params.dielectric_contrast_on) { 00618 scale_vec(pref, lclimge, size); 00619 if(this_node==0) 00620 scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size); 00621 if(this_node==n_nodes-1) 00622 scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size); 00623 } 00624 } 00625 00626 static void add_z_force() 00627 { 00628 int np, c, i; 00629 double add; 00630 Particle *part; 00631 double *othcblk; 00632 int size = 2; 00633 00634 for (c = 1; c <= n_layers; c++) { 00635 othcblk = block(gblcblk, c - 1, size); 00636 add = othcblk[QQEQQP] - othcblk[QQEQQM]; 00637 np = cells[c].n; 00638 part = cells[c].part; 00639 for (i = 0; i < np; i++) { 00640 part[i].f.f[2] += part[i].p.q*add; 00641 LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n", 00642 this_node, part[i].p.identity, part[i].f.f[0], 00643 part[i].f.f[1], part[i].f.f[2])); 00644 } 00645 } 00646 } 00647 00648 static void setup_z_energy() 00649 { 00650 int np, c, i; 00651 double pref = -coulomb.prefactor*C_2PI*ux*uy; 00652 Particle *part; 00653 int e_size = 2, size = 4; 00654 00655 if (this_node == 0) 00656 /* the lowest lclcblk does not contain anything, since there are no charges below the simulation 00657 box, at least for this term. */ 00658 clear_vec(blwentry(lclcblk, 0, e_size), e_size); 00659 00660 if (this_node == n_nodes - 1) 00661 /* same for the top node */ 00662 clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size); 00663 00664 /* calculate local cellblks. partblks don't make sense */ 00665 for (c = 1; c <= n_layers; c++) { 00666 np = cells[c].n; 00667 part = cells[c].part; 00668 clear_vec(blwentry(lclcblk, c, e_size), e_size); 00669 for (i = 0; i < np; i++) { 00670 lclcblk[size*c + ABEQQP] += part[i].p.q; 00671 lclcblk[size*c + ABEQZP] += part[i].p.q*part[i].r.p[2]; 00672 } 00673 scale_vec(pref, blwentry(lclcblk, c, e_size), e_size); 00674 /* just to be able to use the standard distribution. Here below 00675 and above terms are the same */ 00676 copy_vec(abventry(lclcblk, c, e_size), blwentry(lclcblk, c, e_size), e_size); 00677 } 00678 } 00679 00680 static double z_energy() 00681 { 00682 int np, c, i; 00683 Particle *part; 00684 double *othcblk; 00685 int size = 4; 00686 double eng = 0; 00687 for (c = 1; c <= n_layers; c++) { 00688 othcblk = block(gblcblk, c - 1, size); 00689 np = cells[c].n; 00690 part = cells[c].part; 00691 for (i = 0; i < np; i++) { 00692 eng += part[i].p.q*(part[i].r.p[2]*othcblk[ABEQQP] - othcblk[ABEQZP] - 00693 part[i].r.p[2]*othcblk[ABEQQM] + othcblk[ABEQZM]); 00694 00695 } 00696 } 00697 return eng; 00698 } 00699 00700 /*****************************************************************/ 00701 /* PoQ exp sum */ 00702 /*****************************************************************/ 00703 static void setup_P(int p, double omega, double fac) 00704 { 00705 int np, c, i, ic, o = (p-1)*n_localpart; 00706 Particle *part; 00707 double pref = coulomb.prefactor*4*M_PI*ux*uy*fac*fac; 00708 double h = box_l[2]; 00709 double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h)); 00710 double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 00711 double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum; 00712 double fac_delta = mmm2d_params.delta_mult*fac_imgsum; 00713 double layer_top; 00714 double e, e_di_l, e_di_h; 00715 double *llclcblk; 00716 double *lclimgebot = NULL, *lclimgetop = NULL; 00717 int e_size = 2, size = 4; 00718 00719 if (mmm2d_params.dielectric_contrast_on) 00720 clear_vec(lclimge, size); 00721 00722 if(this_node==0) { 00723 /* on the lowest node, clear the lclcblk below, which only contains the images of the lowest layer 00724 if there is dielectric contrast, otherwise it is empty */ 00725 lclimgebot = block(lclcblk, 0, size); 00726 clear_vec(blwentry(lclcblk, 0, e_size), e_size); 00727 } 00728 if(this_node==n_nodes-1) { 00729 /* same for the top node */ 00730 lclimgetop = block(lclcblk, n_layers + 1, size); 00731 clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size); 00732 } 00733 00734 layer_top = my_left[2] + layer_h; 00735 ic = 0; 00736 for (c = 1; c <= n_layers; c++) { 00737 np = cells[c].n; 00738 part = cells[c].part; 00739 llclcblk = block(lclcblk, c, size); 00740 00741 clear_vec(llclcblk, size); 00742 00743 for (i = 0; i < np; i++) { 00744 e = exp(omega*(part[i].r.p[2] - layer_top)); 00745 00746 partblk[size*ic + POQESM] = part[i].p.q*scxcache[o + ic].s/e; 00747 partblk[size*ic + POQESP] = part[i].p.q*scxcache[o + ic].s*e; 00748 partblk[size*ic + POQECM] = part[i].p.q*scxcache[o + ic].c/e; 00749 partblk[size*ic + POQECP] = part[i].p.q*scxcache[o + ic].c*e; 00750 00751 /* take images due to different dielectric constants into account */ 00752 if (mmm2d_params.dielectric_contrast_on) { 00753 if (c==1 && this_node==0) { 00754 /* There are image charges at -(2h+z) and -(2h-z) etc. layer_h included due to the shift 00755 in z */ 00756 e_di_l = ( exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot + 00757 exp(omega*( part[i].r.p[2] - 2*h + layer_h)) )*fac_delta; 00758 00759 e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot; 00760 00761 lclimgebot[POQESP] += part[i].p.q*scxcache[o + ic].s*e; 00762 lclimgebot[POQECP] += part[i].p.q*scxcache[o + ic].c*e; 00763 } 00764 else 00765 /* There are image charges at -(z) and -(2h-z) etc. layer_h included due to the shift in z */ 00766 e_di_l = ( exp(omega*(-part[i].r.p[2] + layer_h)) + 00767 exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot; 00768 00769 if (c==n_layers && this_node==n_nodes-1) { 00770 /* There are image charges at (3h-z) and (h+z) from the top layer etc. layer_h included 00771 due to the shift in z */ 00772 e_di_h = ( exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top + 00773 exp(omega*(-part[i].r.p[2] - h + 2*layer_h)) )*fac_delta; 00774 00775 /* There are image charges at (h-z) layer_h included due to the shift in z */ 00776 e = exp(omega*(part[i].r.p[2] - h + layer_h))*mmm2d_params.delta_mid_top; 00777 00778 lclimgetop[POQESM]+= part[i].p.q*scxcache[o + ic].s*e; 00779 lclimgetop[POQECM]+= part[i].p.q*scxcache[o + ic].c*e; 00780 } 00781 else 00782 /* There are image charges at (h-z) and (h+z) from the top layer etc. layer_h included due 00783 to the shift in z */ 00784 e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) + 00785 exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top; 00786 00787 lclimge[POQESP] += part[i].p.q*scxcache[o + ic].s*e_di_l; 00788 lclimge[POQECP] += part[i].p.q*scxcache[o + ic].c*e_di_l; 00789 lclimge[POQESM] += part[i].p.q*scxcache[o + ic].s*e_di_h; 00790 lclimge[POQECM] += part[i].p.q*scxcache[o + ic].c*e_di_h; 00791 } 00792 00793 add_vec(llclcblk, llclcblk, block(partblk, ic, size), size); 00794 ic++; 00795 } 00796 scale_vec(pref, blwentry(lclcblk, c, e_size), e_size); 00797 scale_vec(pref, abventry(lclcblk, c, e_size), e_size); 00798 00799 layer_top += layer_h; 00800 } 00801 00802 if (mmm2d_params.dielectric_contrast_on) { 00803 scale_vec(pref, lclimge, size); 00804 if(this_node==0) 00805 scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size); 00806 if(this_node==n_nodes-1) 00807 scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size); 00808 } 00809 } 00810 00811 /* compare setup_P */ 00812 static void setup_Q(int q, double omega, double fac) 00813 { 00814 int np, c, i, ic, o = (q-1)*n_localpart; 00815 Particle *part; 00816 double pref = coulomb.prefactor*4*M_PI*ux*uy*fac*fac; 00817 double h = box_l[2]; 00818 double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h)); 00819 double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 00820 double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum; 00821 double fac_delta = mmm2d_params.delta_mult*fac_imgsum; 00822 double layer_top; 00823 double e, e_di_l, e_di_h; 00824 double *llclcblk; 00825 double *lclimgebot=NULL, *lclimgetop=NULL; 00826 int e_size = 2, size = 4; 00827 00828 if (mmm2d_params.dielectric_contrast_on) 00829 clear_vec(lclimge, size); 00830 00831 if(this_node==0) { 00832 lclimgebot = block(lclcblk, 0, size); 00833 clear_vec(blwentry(lclcblk, 0, e_size), e_size); 00834 } 00835 00836 if(this_node==n_nodes-1) { 00837 lclimgetop = block(lclcblk, n_layers + 1, size); 00838 clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size); 00839 } 00840 00841 layer_top = my_left[2] + layer_h; 00842 ic = 0; 00843 for (c = 1; c <= n_layers; c++) { 00844 np = cells[c].n; 00845 part = cells[c].part; 00846 llclcblk = block(lclcblk, c, size); 00847 00848 clear_vec(llclcblk, size); 00849 00850 for (i = 0; i < np; i++) { 00851 e = exp(omega*(part[i].r.p[2] - layer_top)); 00852 00853 partblk[size*ic + POQESM] = part[i].p.q*scycache[o + ic].s/e; 00854 partblk[size*ic + POQESP] = part[i].p.q*scycache[o + ic].s*e; 00855 partblk[size*ic + POQECM] = part[i].p.q*scycache[o + ic].c/e; 00856 partblk[size*ic + POQECP] = part[i].p.q*scycache[o + ic].c*e; 00857 00858 if (mmm2d_params.dielectric_contrast_on) { 00859 if(c==1 && this_node==0) { 00860 e_di_l = ( exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot + 00861 exp(omega*( part[i].r.p[2] - 2*h + layer_h)) )*fac_delta; 00862 00863 e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot; 00864 00865 lclimgebot[POQESP] += part[i].p.q*scycache[o + ic].s*e; 00866 lclimgebot[POQECP] += part[i].p.q*scycache[o + ic].c*e; 00867 } 00868 else 00869 e_di_l = ( exp(omega*(-part[i].r.p[2] + layer_h)) + 00870 exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot; 00871 00872 if(c==n_layers && this_node==n_nodes-1) { 00873 e_di_h = (exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top + 00874 exp(omega*(-part[i].r.p[2] - h + 2*layer_h)) )*fac_delta; 00875 00876 e = exp(omega*(part[i].r.p[2] - h + layer_h))*mmm2d_params.delta_mid_top; 00877 00878 lclimgetop[POQESM] += part[i].p.q*scycache[o + ic].s*e; 00879 lclimgetop[POQECM] += part[i].p.q*scycache[o + ic].c*e; 00880 } 00881 else 00882 e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) + 00883 exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top; 00884 00885 lclimge[POQESP] += part[i].p.q*scycache[o + ic].s*e_di_l; 00886 lclimge[POQECP] += part[i].p.q*scycache[o + ic].c*e_di_l; 00887 lclimge[POQESM] += part[i].p.q*scycache[o + ic].s*e_di_h; 00888 lclimge[POQECM] += part[i].p.q*scycache[o + ic].c*e_di_h; 00889 } 00890 00891 add_vec(llclcblk, llclcblk, block(partblk, ic, size), size); 00892 ic++; 00893 } 00894 scale_vec(pref, blwentry(lclcblk, c, e_size), e_size); 00895 scale_vec(pref, abventry(lclcblk, c, e_size), e_size); 00896 00897 layer_top += layer_h; 00898 } 00899 00900 if (mmm2d_params.dielectric_contrast_on) { 00901 scale_vec(pref, lclimge, size); 00902 if(this_node==0) 00903 scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size); 00904 if(this_node==n_nodes-1) 00905 scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size); 00906 } 00907 } 00908 00909 static void add_P_force() 00910 { 00911 int np, c, i, ic; 00912 Particle *part; 00913 double *othcblk; 00914 int size = 4; 00915 00916 ic = 0; 00917 for (c = 1; c <= n_layers; c++) { 00918 np = cells[c].n; 00919 part = cells[c].part; 00920 othcblk = block(gblcblk, c - 1, size); 00921 00922 for (i = 0; i < np; i++) { 00923 part[i].f.f[0] += 00924 partblk[size*ic + POQESM]*othcblk[POQECP] - partblk[size*ic + POQECM]*othcblk[POQESP] + 00925 partblk[size*ic + POQESP]*othcblk[POQECM] - partblk[size*ic + POQECP]*othcblk[POQESM]; 00926 part[i].f.f[2] += 00927 partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] - 00928 partblk[size*ic + POQECP]*othcblk[POQECM] - partblk[size*ic + POQESP]*othcblk[POQESM]; 00929 00930 LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n", 00931 this_node, part[i].p.identity, part[i].f.f[0], 00932 part[i].f.f[1], part[i].f.f[2])); 00933 ic++; 00934 } 00935 } 00936 } 00937 00938 static double P_energy(double omega) 00939 { 00940 int np, c, i, ic; 00941 double *othcblk; 00942 int size = 4; 00943 double eng = 0; 00944 double pref = 1/omega; 00945 00946 ic = 0; 00947 for (c = 1; c <= n_layers; c++) { 00948 np = cells[c].n; 00949 othcblk = block(gblcblk, c - 1, size); 00950 for (i = 0; i < np; i++) { 00951 eng += pref*(partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] + 00952 partblk[size*ic + POQECP]*othcblk[POQECM] + partblk[size*ic + POQESP]*othcblk[POQESM]); 00953 ic++; 00954 } 00955 } 00956 return eng; 00957 } 00958 00959 static void add_Q_force() 00960 { 00961 int np, c, i, ic; 00962 Particle *part; 00963 double *othcblk; 00964 int size = 4; 00965 00966 ic = 0; 00967 for (c = 1; c <= n_layers; c++) { 00968 np = cells[c].n; 00969 part = cells[c].part; 00970 othcblk = block(gblcblk, c - 1, size); 00971 00972 for (i = 0; i < np; i++) { 00973 part[i].f.f[1] += 00974 partblk[size*ic + POQESM]*othcblk[POQECP] - partblk[size*ic + POQECM]*othcblk[POQESP] + 00975 partblk[size*ic + POQESP]*othcblk[POQECM] - partblk[size*ic + POQECP]*othcblk[POQESM]; 00976 part[i].f.f[2] += 00977 partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] - 00978 partblk[size*ic + POQECP]*othcblk[POQECM] - partblk[size*ic + POQESP]*othcblk[POQESM]; 00979 00980 LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n", 00981 this_node, part[i].p.identity, part[i].f.f[0], 00982 part[i].f.f[1], part[i].f.f[2])); 00983 ic++; 00984 } 00985 } 00986 } 00987 00988 static double Q_energy(double omega) 00989 { 00990 int np, c, i, ic; 00991 double *othcblk; 00992 int size = 4; 00993 double eng = 0; 00994 double pref = 1/omega; 00995 00996 ic = 0; 00997 for (c = 1; c <= n_layers; c++) { 00998 np = cells[c].n; 00999 othcblk = block(gblcblk, c - 1, size); 01000 01001 for (i = 0; i < np; i++) { 01002 eng += pref*(partblk[size*ic + POQECM]*othcblk[POQECP] + partblk[size*ic + POQESM]*othcblk[POQESP] + 01003 partblk[size*ic + POQECP]*othcblk[POQECM] + partblk[size*ic + POQESP]*othcblk[POQESM]); 01004 ic++; 01005 } 01006 } 01007 return eng; 01008 } 01009 01010 /*****************************************************************/ 01011 /* PQ particle blocks */ 01012 /*****************************************************************/ 01013 01014 /* compare setup_P */ 01015 static void setup_PQ(int p, int q, double omega, double fac) 01016 { 01017 int np, c, i, ic, ox = (p - 1)*n_localpart, oy = (q - 1)*n_localpart; 01018 Particle *part; 01019 double pref = coulomb.prefactor*8*M_PI*ux*uy*fac*fac; 01020 double h = box_l[2]; 01021 double fac_imgsum = 1/(1 - mmm2d_params.delta_mult*exp(-omega*2*h)); 01022 double fac_delta_mid_bot = mmm2d_params.delta_mid_bot*fac_imgsum; 01023 double fac_delta_mid_top = mmm2d_params.delta_mid_top*fac_imgsum; 01024 double fac_delta = mmm2d_params.delta_mult*fac_imgsum; 01025 double layer_top; 01026 double e, e_di_l, e_di_h; 01027 double *llclcblk; 01028 double *lclimgebot=NULL, *lclimgetop=NULL; 01029 int e_size = 4, size = 8; 01030 01031 if (mmm2d_params.dielectric_contrast_on) 01032 clear_vec(lclimge, size); 01033 01034 if(this_node==0) { 01035 lclimgebot = block(lclcblk, 0, size); 01036 clear_vec(blwentry(lclcblk, 0, e_size), e_size); 01037 } 01038 01039 if(this_node==n_nodes-1) { 01040 lclimgetop = block(lclcblk, n_layers + 1, size); 01041 clear_vec(abventry(lclcblk, n_layers + 1, e_size), e_size); 01042 } 01043 01044 layer_top = my_left[2] + layer_h; 01045 ic = 0; 01046 for (c = 1; c <= n_layers; c++) { 01047 np = cells[c].n; 01048 part = cells[c].part; 01049 llclcblk = block(lclcblk, c, size); 01050 01051 clear_vec(llclcblk, size); 01052 01053 for (i = 0; i < np; i++) { 01054 e = exp(omega*(part[i].r.p[2] - layer_top)); 01055 01056 partblk[size*ic + PQESSM] = scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q/e; 01057 partblk[size*ic + PQESCM] = scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q/e; 01058 partblk[size*ic + PQECSM] = scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q/e; 01059 partblk[size*ic + PQECCM] = scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q/e; 01060 01061 partblk[size*ic + PQESSP] = scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e; 01062 partblk[size*ic + PQESCP] = scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e; 01063 partblk[size*ic + PQECSP] = scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e; 01064 partblk[size*ic + PQECCP] = scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e; 01065 01066 if (mmm2d_params.dielectric_contrast_on) { 01067 if(c==1 && this_node==0) { 01068 e_di_l = (exp(omega*(-part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_bot + 01069 exp(omega*( part[i].r.p[2] - 2*h + layer_h)) )*fac_delta; 01070 01071 e = exp(omega*(-part[i].r.p[2]))*mmm2d_params.delta_mid_bot; 01072 01073 lclimgebot[PQESSP] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e; 01074 lclimgebot[PQESCP] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e; 01075 lclimgebot[PQECSP] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e; 01076 lclimgebot[PQECCP] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e; 01077 } 01078 else 01079 e_di_l = ( exp(omega*(-part[i].r.p[2] + layer_h)) + 01080 exp(omega*( part[i].r.p[2] - 2*h + layer_h))*mmm2d_params.delta_mid_top )*fac_delta_mid_bot; 01081 01082 if(c==n_layers && this_node==n_nodes-1) { 01083 e_di_h = ( exp(omega*( part[i].r.p[2] - 3*h + 2*layer_h))*mmm2d_params.delta_mid_top + 01084 exp(omega*(-part[i].r.p[2] - h + 2*layer_h)) )*fac_delta; 01085 01086 e = exp(omega*(part[i].r.p[2]-h+layer_h))*mmm2d_params.delta_mid_top; 01087 01088 lclimgetop[PQESSM] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e; 01089 lclimgetop[PQESCM] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e; 01090 lclimgetop[PQECSM] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e; 01091 lclimgetop[PQECCM] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e; 01092 } 01093 else 01094 e_di_h = ( exp(omega*( part[i].r.p[2] - h + 2*layer_h)) + 01095 exp(omega*(-part[i].r.p[2] - h + 2*layer_h))*mmm2d_params.delta_mid_bot )*fac_delta_mid_top; 01096 01097 lclimge[PQESSP] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e_di_l; 01098 lclimge[PQESCP] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e_di_l; 01099 lclimge[PQECSP] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e_di_l; 01100 lclimge[PQECCP] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e_di_l; 01101 01102 lclimge[PQESSM] += scxcache[ox + ic].s*scycache[oy + ic].s*part[i].p.q*e_di_h; 01103 lclimge[PQESCM] += scxcache[ox + ic].s*scycache[oy + ic].c*part[i].p.q*e_di_h; 01104 lclimge[PQECSM] += scxcache[ox + ic].c*scycache[oy + ic].s*part[i].p.q*e_di_h; 01105 lclimge[PQECCM] += scxcache[ox + ic].c*scycache[oy + ic].c*part[i].p.q*e_di_h; 01106 } 01107 01108 add_vec(llclcblk, llclcblk, block(partblk, ic, size), size); 01109 ic++; 01110 } 01111 scale_vec(pref, blwentry(lclcblk, c, e_size), e_size); 01112 scale_vec(pref, abventry(lclcblk, c, e_size), e_size); 01113 01114 layer_top += layer_h; 01115 } 01116 01117 if (mmm2d_params.dielectric_contrast_on) { 01118 scale_vec(pref, lclimge, size); 01119 01120 if(this_node==0) 01121 scale_vec(pref, blwentry(lclcblk, 0, e_size), e_size); 01122 if(this_node==n_nodes-1) 01123 scale_vec(pref, abventry(lclcblk, n_layers + 1, e_size), e_size); 01124 } 01125 } 01126 01127 static void add_PQ_force(int p, int q, double omega) 01128 { 01129 int np, c, i, ic; 01130 Particle *part; 01131 double pref_x = C_2PI*ux*p/omega; 01132 double pref_y = C_2PI*uy*q/omega; 01133 double *othcblk; 01134 int size = 8; 01135 01136 ic = 0; 01137 for (c = 1; c <= n_layers; c++) { 01138 np = cells[c].n; 01139 part = cells[c].part; 01140 othcblk = block(gblcblk, c - 1, size); 01141 01142 for (i = 0; i < np; i++) { 01143 part[i].f.f[0] += 01144 pref_x*(partblk[size*ic + PQESCM]*othcblk[PQECCP] + partblk[size*ic + PQESSM]*othcblk[PQECSP] - 01145 partblk[size*ic + PQECCM]*othcblk[PQESCP] - partblk[size*ic + PQECSM]*othcblk[PQESSP] + 01146 partblk[size*ic + PQESCP]*othcblk[PQECCM] + partblk[size*ic + PQESSP]*othcblk[PQECSM] - 01147 partblk[size*ic + PQECCP]*othcblk[PQESCM] - partblk[size*ic + PQECSP]*othcblk[PQESSM]); 01148 part[i].f.f[1] += 01149 pref_y*(partblk[size*ic + PQECSM]*othcblk[PQECCP] + partblk[size*ic + PQESSM]*othcblk[PQESCP] - 01150 partblk[size*ic + PQECCM]*othcblk[PQECSP] - partblk[size*ic + PQESCM]*othcblk[PQESSP] + 01151 partblk[size*ic + PQECSP]*othcblk[PQECCM] + partblk[size*ic + PQESSP]*othcblk[PQESCM] - 01152 partblk[size*ic + PQECCP]*othcblk[PQECSM] - partblk[size*ic + PQESCP]*othcblk[PQESSM]); 01153 part[i].f.f[2] += 01154 (partblk[size*ic + PQECCM]*othcblk[PQECCP] + partblk[size*ic + PQECSM]*othcblk[PQECSP] + 01155 partblk[size*ic + PQESCM]*othcblk[PQESCP] + partblk[size*ic + PQESSM]*othcblk[PQESSP] - 01156 partblk[size*ic + PQECCP]*othcblk[PQECCM] - partblk[size*ic + PQECSP]*othcblk[PQECSM] - 01157 partblk[size*ic + PQESCP]*othcblk[PQESCM] - partblk[size*ic + PQESSP]*othcblk[PQESSM]); 01158 01159 LOG_FORCES(fprintf(stderr, "%d: part %d force %10.3g %10.3g %10.3g\n", 01160 this_node, part[i].p.identity, part[i].f.f[0], 01161 part[i].f.f[1], part[i].f.f[2])); 01162 ic++; 01163 } 01164 } 01165 } 01166 01167 static double PQ_energy(double omega) 01168 { 01169 int size = 8; 01170 double eng = 0; 01171 double pref = 1/omega; 01172 01173 int ic = 0; 01174 for (int c = 1; c <= n_layers; c++) { 01175 int np = cells[c].n; 01176 double *othcblk = block(gblcblk, c - 1, size); 01177 01178 for (int i = 0; i < np; i++) { 01179 eng += pref*(partblk[size*ic + PQECCM]*othcblk[PQECCP] + partblk[size*ic + PQECSM]*othcblk[PQECSP] + 01180 partblk[size*ic + PQESCM]*othcblk[PQESCP] + partblk[size*ic + PQESSM]*othcblk[PQESSP] + 01181 partblk[size*ic + PQECCP]*othcblk[PQECCM] + partblk[size*ic + PQECSP]*othcblk[PQECSM] + 01182 partblk[size*ic + PQESCP]*othcblk[PQESCM] + partblk[size*ic + PQESSP]*othcblk[PQESSM]); 01183 ic++; 01184 } 01185 } 01186 return eng; 01187 } 01188 01189 /*****************************************************************/ 01190 /* main loops */ 01191 /*****************************************************************/ 01192 01193 static void add_force_contribution(int p, int q) 01194 { 01195 double omega, fac; 01196 01197 if (q == 0) { 01198 if (p == 0) { 01199 setup_z_force(); 01200 01201 if (mmm2d_params.dielectric_contrast_on) 01202 gather_image_contributions(1); 01203 else 01204 clear_image_contributions(1); 01205 01206 distribute(1, 1.); 01207 add_z_force(); 01208 checkpoint("************2piz", 0, 0, 1); 01209 } 01210 else { 01211 omega = C_2PI*ux*p; 01212 fac = exp(-omega*layer_h); 01213 setup_P(p, omega, fac); 01214 if (mmm2d_params.dielectric_contrast_on) 01215 gather_image_contributions(2); 01216 else 01217 clear_image_contributions(2); 01218 distribute(2, fac); 01219 add_P_force(); 01220 checkpoint("************distri p", p, 0, 2); 01221 } 01222 } 01223 else if (p == 0) { 01224 omega = C_2PI*uy*q; 01225 fac = exp(-omega*layer_h); 01226 setup_Q(q, omega, fac); 01227 if (mmm2d_params.dielectric_contrast_on) 01228 gather_image_contributions(2); 01229 else 01230 clear_image_contributions(2); 01231 distribute(2, fac); 01232 add_Q_force(); 01233 checkpoint("************distri q", 0, q, 2); 01234 } 01235 else { 01236 omega = C_2PI*sqrt(SQR(ux*p) + SQR(uy*q)); 01237 fac = exp(-omega*layer_h); 01238 setup_PQ(p, q, omega, fac); 01239 if (mmm2d_params.dielectric_contrast_on) 01240 gather_image_contributions(4); 01241 else 01242 clear_image_contributions(4); 01243 distribute(4, fac); 01244 add_PQ_force(p, q, omega); 01245 checkpoint("************distri pq", p, q, 4); 01246 } 01247 } 01248 01249 static double energy_contribution(int p, int q) 01250 { 01251 double eng; 01252 double omega, fac; 01253 01254 if (q == 0) { 01255 if (p == 0) { 01256 setup_z_energy(); 01257 clear_image_contributions(2); 01258 distribute(2, 1.); 01259 eng = z_energy(); 01260 checkpoint("E************2piz", 0, 0, 2); 01261 } 01262 else { 01263 omega = C_2PI*ux*p; 01264 fac = exp(-omega*layer_h); 01265 setup_P(p, omega, fac); 01266 if (mmm2d_params.dielectric_contrast_on) 01267 gather_image_contributions(2); 01268 else 01269 clear_image_contributions(2); 01270 distribute(2, fac); 01271 eng = P_energy(omega); 01272 checkpoint("************distri p", p, 0, 2); 01273 } 01274 } 01275 else if (p == 0) { 01276 omega = C_2PI*uy*q; 01277 fac = exp(-omega*layer_h); 01278 setup_Q(q, omega, fac); 01279 if (mmm2d_params.dielectric_contrast_on) 01280 gather_image_contributions(2); 01281 else 01282 clear_image_contributions(2); 01283 distribute(2, fac); 01284 eng = Q_energy(omega); 01285 checkpoint("************distri q", 0, q, 2); 01286 } 01287 else { 01288 omega = C_2PI*sqrt(SQR(ux*p) + SQR(uy*q)); 01289 fac = exp(-omega*layer_h); 01290 setup_PQ(p, q, omega, fac); 01291 if (mmm2d_params.dielectric_contrast_on) 01292 gather_image_contributions(4); 01293 else 01294 clear_image_contributions(4); 01295 distribute(4, fac); 01296 eng = PQ_energy(omega); 01297 checkpoint("************distri pq", p, q, 4); 01298 } 01299 return eng; 01300 } 01301 01302 double MMM2D_add_far(int f, int e) 01303 { 01304 double eng; 01305 int p, q; 01306 double R, dR, q2; 01307 int *undone; 01308 01309 // It's not really far... 01310 eng = e ? self_energy : 0; 01311 01312 if (mmm2d_params.far_cut == 0.0) 01313 return 0.5*eng; 01314 01315 undone = malloc((n_scxcache + 1)*sizeof(int)); 01316 01317 prepare_scx_cache(); 01318 prepare_scy_cache(); 01319 01320 /* complicated loop. We work through the p,q vectors in rings 01321 from outside to inside to avoid problems with cancellation */ 01322 01323 /* up to which q vector we have to work */ 01324 for (p = 0; p <= n_scxcache; p++) { 01325 if (p == 0) 01326 q = n_scycache; 01327 else { 01328 q2 = mmm2d_params.far_cut2 - SQR(ux*(p - 1)); 01329 if (q2 > 0) 01330 q = 1 + box_l[1]*(int)ceil(sqrt(q2)); 01331 else 01332 q = 1; 01333 /* just to be on the safe side... */ 01334 if (q > n_scycache) q = n_scycache; 01335 } 01336 undone[p] = q; 01337 } 01338 01339 dR = -log(FARRELPREC)/C_2PI*uz; 01340 01341 for(R = mmm2d_params.far_cut; R > 0; R -= dR) { 01342 for (p = n_scxcache; p >= 0; p--) { 01343 for (q = undone[p]; q >= 0; q--) { 01344 if (ux2*SQR(p) + uy2*SQR(q) < SQR(R)) 01345 break; 01346 if (f) 01347 add_force_contribution(p, q); 01348 if (e) 01349 eng += energy_contribution(p, q); 01350 } 01351 undone[p] = q; 01352 } 01353 } 01354 // printf("yyyy\n"); 01355 /* clean up left overs */ 01356 for (p = n_scxcache; p >= 0; p--) { 01357 q = undone[p]; 01358 // fprintf(stderr, "left over %d\n", q); 01359 for (; q >= 0; q--) { 01360 // printf("xxxxx %d %d\n", p, q); 01361 if (f) 01362 add_force_contribution(p, q); 01363 if (e) 01364 eng += energy_contribution(p, q); 01365 } 01366 } 01367 01368 free(undone); 01369 01370 return 0.5*eng; 01371 } 01372 01373 static int MMM2D_tune_far(double error) 01374 { 01375 double err; 01376 double min_inv_boxl = dmin(ux, uy); 01377 mmm2d_params.far_cut = min_inv_boxl; 01378 do { 01379 err = exp(-2*M_PI*mmm2d_params.far_cut*min_far)/min_far* 01380 (C_2PI*mmm2d_params.far_cut + 2*(ux + uy) + 1/min_far); 01381 // fprintf(stderr, "far tuning: %f -> %f, %f\n", mmm2d_params.far_cut, err, min_far); 01382 mmm2d_params.far_cut += min_inv_boxl; 01383 } 01384 while (err > error && mmm2d_params.far_cut*layer_h < MAXIMAL_FAR_CUT); 01385 if (mmm2d_params.far_cut*layer_h >= MAXIMAL_FAR_CUT) 01386 return ERROR_FARC; 01387 // fprintf(stderr, "far cutoff %g %g %g\n", mmm2d_params.far_cut, err, min_far); 01388 mmm2d_params.far_cut -= min_inv_boxl; 01389 mmm2d_params.far_cut2 = SQR(mmm2d_params.far_cut); 01390 return 0; 01391 } 01392 01393 /**************************************** 01394 * NEAR FORMULA 01395 ****************************************/ 01396 01397 static int MMM2D_tune_near(double error) 01398 { 01399 int P, n, i; 01400 double uxrho2m2max, uxrhomax2; 01401 int p; 01402 double T, pref, err, exponent; 01403 double L, sum; 01404 01405 /* yes, it's y only... */ 01406 if (max_near > box_l[1]/2) 01407 return ERROR_LARGE; 01408 if (min_far < 0) 01409 return ERROR_SMALL; 01410 if (ux*box_l[1] >= 3/M_SQRT2 ) 01411 return ERROR_BOXL; 01412 01413 /* error is split into three parts: 01414 one part for bessel, one for complex 01415 and one for polygamma cutoff */ 01416 part_error = error/3; 01417 01418 /* Bessel sum, determine cutoff */ 01419 P = 2; 01420 exponent = M_PI*ux*box_l[1]; 01421 T = exp(exponent)/exponent; 01422 pref = 8*ux*dmax(C_2PI*ux, 1); 01423 do { 01424 L = M_PI*ux*(P - 1); 01425 sum = 0; 01426 for (p = 1; p <= P; p++) 01427 sum += p*exp(-exponent*p); 01428 err = pref*K1(box_l[1]*L)*(T*((L + uy)/M_PI*box_l[0] - 1) + sum); 01429 P++; 01430 } 01431 while (err > part_error && (P - 1) < MAXIMAL_B_CUT); 01432 P--; 01433 if (P == MAXIMAL_B_CUT) 01434 return ERROR_BESSEL; 01435 01436 // fprintf(stderr, "bessel cutoff %d %g\n", P, err); 01437 01438 realloc_intlist(&besselCutoff, besselCutoff.n = P); 01439 for (p = 1; p < P; p++) 01440 besselCutoff.e[p-1] = (int)floor(((double)P)/(2*p)) + 1; 01441 01442 /* complex sum, determine cutoffs (dist dependent) */ 01443 T = log(part_error/(16*M_SQRT2)*box_l[0]*box_l[1]); 01444 // for 0, the sum is exactly zero, so do not calculate anything 01445 complexCutoff[0] = 0; 01446 for (i = 1; i <= COMPLEX_STEP; i++) 01447 complexCutoff[i] = (int)ceil(T/log(i/COMPLEX_FAC)); 01448 prepareBernoulliNumbers(complexCutoff[COMPLEX_STEP]); 01449 01450 /* polygamma, determine order */ 01451 n = 1; 01452 uxrhomax2 = SQR(ux*box_l[1])/2; 01453 uxrho2m2max = 1.0; 01454 do { 01455 create_mod_psi_up_to(n+1); 01456 01457 err = 2*n*fabs(mod_psi_even(n, 0.5))*uxrho2m2max; 01458 uxrho2m2max *= uxrhomax2; 01459 n++; 01460 } 01461 while (err > 0.1*part_error && n < MAXIMAL_POLYGAMMA); 01462 if (n == MAXIMAL_POLYGAMMA) 01463 return ERROR_POLY; 01464 // fprintf(stderr, "polygamma cutoff %d %g\n", n, err); 01465 01466 return 0; 01467 } 01468 01469 static void prepareBernoulliNumbers(int bon_order) 01470 { 01471 int l; 01472 /* BernoulliB[2 n]/(2 n)!(2 Pi)^(2n) up to order 33 */ 01473 static double bon_table[34] = { 01474 1.0000000000000000000, 3.2898681336964528729, 01475 -2.1646464674222763830, 2.0346861239688982794, 01476 -2.0081547123958886788, 2.0019891502556361707, 01477 -2.0004921731066160966, 2.0001224962701174097, 01478 -2.0000305645188173037, 2.0000076345865299997, 01479 -2.0000019079240677456, 2.0000004769010054555, 01480 -2.0000001192163781025, 2.0000000298031096567, 01481 -2.0000000074506680496, 2.0000000018626548648, 01482 -2.0000000004656623667, 2.0000000001164154418, 01483 -2.0000000000291038438, 2.0000000000072759591, 01484 -2.0000000000018189896, 2.0000000000004547474, 01485 -2.0000000000001136868, 2.0000000000000284217, 01486 -2.0000000000000071054, 2.0000000000000017764, 01487 -2.0000000000000004441, 2.0000000000000001110, 01488 -2.0000000000000000278, 2.0000000000000000069, 01489 -2.0000000000000000017, 2.0000000000000000004, 01490 -2.0000000000000000001, 2.0000000000000000000 01491 }; 01492 01493 if (bon_order < 2) 01494 bon_order = 2; 01495 01496 realloc_doublelist(&bon, bon.n = bon_order); 01497 01498 /* the ux is multiplied in to bessel, complex and psi at once, not here, 01499 and we use uy*(z + iy), so the uy is also treated below */ 01500 for(l = 1; (l <= bon_order) && (l < 34); l++) 01501 bon.e[l-1] = 2*uy*bon_table[l]; 01502 01503 for (; l <= bon_order; l++) { 01504 if (l & 1) 01505 bon.e[l-1] = 4.0*uy; 01506 else 01507 bon.e[l-1] = -4.0*uy; 01508 } 01509 } 01510 01511 void add_mmm2d_coulomb_pair_force(double charge_factor, 01512 double d[3], double dl2, double dl, double force[3]) 01513 { 01514 double F[3]; 01515 double pref = coulomb.prefactor*charge_factor; 01516 double z2 = d[2]*d[2]; 01517 double rho2 = d[1]*d[1] + z2; 01518 int i; 01519 01520 #ifdef ADDITIONAL_CHECKS 01521 if (d[2] >box_l[1]/2) { 01522 char *errtxt = runtime_error(128); 01523 ERROR_SPRINTF(errtxt, "{024 near formula called for too distant particle pair} "); 01524 return; 01525 } 01526 #endif 01527 01528 F[0] = F[1] = F[2] = 0; 01529 01530 /* Bessel sum */ 01531 { 01532 int p, l; 01533 double k0, k1; 01534 double k0Sum, k1ySum, k1Sum; 01535 double freq; 01536 double rho_l, ypl; 01537 double c, s; 01538 01539 for (p = 1; p < besselCutoff.n; p++) { 01540 k0Sum = 0; 01541 k1ySum = 0; 01542 k1Sum = 0; 01543 01544 freq = C_2PI*ux*p; 01545 01546 for (l = 1; l < besselCutoff.e[p-1]; l++) { 01547 ypl = d[1] + l*box_l[1]; 01548 rho_l = sqrt(ypl*ypl + z2); 01549 #ifdef BESSEL_MACHINE_PREC 01550 k0 = K0(freq*rho_l); 01551 k1 = K1(freq*rho_l); 01552 #else 01553 LPK01(freq*rho_l, &k0, &k1); 01554 #endif 01555 k1 /= rho_l; 01556 k0Sum += k0; 01557 k1Sum += k1; 01558 k1ySum += k1*ypl; 01559 01560 ypl = d[1] - l*box_l[1]; 01561 rho_l = sqrt(ypl*ypl + z2); 01562 #ifdef BESSEL_MACHINE_PREC 01563 k0 = K0(freq*rho_l); 01564 k1 = K1(freq*rho_l); 01565 #else 01566 LPK01(freq*rho_l, &k0, &k1); 01567 #endif 01568 k1 /= rho_l; 01569 k0Sum += k0; 01570 k1Sum += k1; 01571 k1ySum += k1*ypl; 01572 } 01573 01574 /* the ux is multiplied in to bessel, complex and psi at once, not here */ 01575 c = 4*freq*cos(freq*d[0]); 01576 s = 4*freq*sin(freq*d[0]); 01577 F[0] += s*k0Sum; 01578 F[1] += c*k1ySum; 01579 F[2] += d[2]*c*k1Sum; 01580 } 01581 // fprintf(stderr, " bessel force %f %f %f\n", F[0], F[1], F[2]); 01582 } 01583 01584 /* complex sum */ 01585 { 01586 double zeta_r, zeta_i; 01587 double zet2_r, zet2_i; 01588 double ztn_r, ztn_i; 01589 double tmp_r; 01590 int end, n; 01591 01592 ztn_r = zeta_r = uy*d[2]; 01593 ztn_i = zeta_i = uy*d[1]; 01594 zet2_r = zeta_r*zeta_r - zeta_i*zeta_i; 01595 zet2_i = 2*zeta_r*zeta_i; 01596 01597 end = (int)ceil(COMPLEX_FAC*uy2*rho2); 01598 if (end > COMPLEX_STEP) { 01599 end = COMPLEX_STEP; 01600 fprintf(stderr, "MMM2D: some particles left the assumed slab, precision might be lost\n"); 01601 } 01602 if (end < 0) { 01603 char *errtxt = runtime_error(100); 01604 ERROR_SPRINTF(errtxt, "{MMM2D: distance was negative, coordinates probably out of range } "); 01605 end = 0; 01606 } 01607 end = complexCutoff[end]; 01608 01609 for (n = 0; n < end; n++) { 01610 F[1] -= bon.e[n]*ztn_i; 01611 F[2] += bon.e[n]*ztn_r; 01612 01613 tmp_r = ztn_r*zet2_r - ztn_i*zet2_i; 01614 ztn_i = ztn_r*zet2_i + ztn_i*zet2_r; 01615 ztn_r = tmp_r; 01616 } 01617 // fprintf(stderr, "complex force %f %f %f %d\n", F[0], F[1], F[2], end); 01618 } 01619 01620 /* psi sum */ 01621 { 01622 int n; 01623 double uxx = ux*d[0]; 01624 double uxrho2 = ux2*rho2; 01625 double uxrho_2n, uxrho_2nm2; /* rho^{2n-2} */ 01626 double mpe, mpo; 01627 01628 /* n = 0 inflicts only Fx and pot */ 01629 /* one ux is multiplied in to bessel, complex and psi at once, not here */ 01630 F[0] += ux*mod_psi_odd(0, uxx); 01631 01632 uxrho_2nm2 = 1.0; 01633 for (n = 1;n < n_modPsi; n++) { 01634 mpe = mod_psi_even(n, uxx); 01635 mpo = mod_psi_odd(n, uxx); 01636 uxrho_2n = uxrho_2nm2*uxrho2; 01637 01638 F[0] += ux *uxrho_2n *mpo; 01639 F[1] += 2*n*ux2*uxrho_2nm2*mpe*d[1]; 01640 F[2] += 2*n*ux2*uxrho_2nm2*mpe*d[2]; 01641 01642 /* y < rho => ux2*uxrho_2nm2*d[1] < ux*uxrho_2n */ 01643 if (fabs(2*n*ux*uxrho_2n*mpe) < part_error) 01644 break; 01645 01646 uxrho_2nm2 = uxrho_2n; 01647 } 01648 // fprintf(stderr, " psi force %f %f %f %d\n", F[0], F[1], F[2], n); 01649 } 01650 01651 01652 for (i = 0; i < 3; i++) 01653 F[i] *= ux; 01654 01655 /* explicitly added potentials r_{-1,0} and r_{1,0} */ 01656 { 01657 double cx = d[0] + box_l[0]; 01658 double rinv2 = 1.0/(cx*cx + rho2), rinv = sqrt(rinv2); 01659 double rinv3 = rinv*rinv2; 01660 F[0] += cx*rinv3; 01661 F[1] += d[1]*rinv3; 01662 F[2] += d[2]*rinv3; 01663 01664 cx = d[0] - box_l[0]; 01665 rinv2 = 1.0/(cx*cx + rho2); rinv = sqrt(rinv2); 01666 rinv3 = rinv*rinv2; 01667 F[0] += cx*rinv3; 01668 F[1] += d[1]*rinv3; 01669 F[2] += d[2]*rinv3; 01670 01671 rinv3 = 1/(dl2*dl); 01672 F[0] += d[0]*rinv3; 01673 F[1] += d[1]*rinv3; 01674 F[2] += d[2]*rinv3; 01675 01676 // fprintf(stderr, "explcit force %f %f %f\n", F[0], F[1], F[2]); 01677 } 01678 01679 for (i = 0; i < 3; i++) 01680 force[i] += pref*F[i]; 01681 } 01682 01683 MDINLINE double calc_mmm2d_copy_pair_energy(double d[3]) 01684 { 01685 double eng; 01686 double z2 = d[2]*d[2]; 01687 double rho2 = d[1]*d[1] + z2; 01688 01689 /* the ux is multiplied in below */ 01690 eng = -2*log(4*M_PI*uy*box_l[0]); 01691 01692 /* Bessel sum */ 01693 { 01694 int p, l; 01695 double k0Sum; 01696 double freq; 01697 double rho_l, ypl; 01698 double c; 01699 01700 for (p = 1; p < besselCutoff.n; p++) { 01701 k0Sum = 0; 01702 01703 freq = C_2PI*ux*p; 01704 01705 for (l = 1; l < besselCutoff.e[p-1]; l++) { 01706 ypl = d[1] + l*box_l[1]; 01707 rho_l = sqrt(ypl*ypl + z2); 01708 k0Sum += K0(freq*rho_l); 01709 01710 ypl = d[1] - l*box_l[1]; 01711 rho_l = sqrt(ypl*ypl + z2); 01712 k0Sum += K0(freq*rho_l); 01713 } 01714 01715 /* the ux is multiplied in to bessel, complex and psi at once, not here */ 01716 c = 4*cos(freq*d[0]); 01717 eng += c*k0Sum; 01718 } 01719 // fprintf(stderr, " bessel energy %f\n", eng); 01720 } 01721 01722 /* complex sum */ 01723 { 01724 double zeta_r, zeta_i; 01725 double zet2_r, zet2_i; 01726 double ztn_r, ztn_i; 01727 double tmp_r; 01728 int end, n; 01729 01730 ztn_r = zeta_r = uy*d[2]; 01731 ztn_i = zeta_i = uy*d[1]; 01732 01733 zet2_r = zeta_r*zeta_r - zeta_i*zeta_i; 01734 zet2_i = 2*zeta_r*zeta_i; 01735 01736 ztn_r = zet2_r; 01737 ztn_i = zet2_i; 01738 01739 end = (int)ceil(COMPLEX_FAC*uy2*rho2); 01740 if (end > COMPLEX_STEP) { 01741 end = COMPLEX_STEP; 01742 fprintf(stderr, "MMM2D: some particles left the assumed slab, precision might be lost\n"); 01743 } 01744 end = complexCutoff[end]; 01745 for (n = 1; n <= end; n++) { 01746 eng -= box_l[1]/(2*n)*bon.e[n-1]*ztn_r; 01747 01748 tmp_r = ztn_r*zet2_r - ztn_i*zet2_i; 01749 ztn_i = ztn_r*zet2_i + ztn_i*zet2_r; 01750 ztn_r = tmp_r; 01751 } 01752 // fprintf(stderr, "complex energy %f %d\n", eng, end); 01753 } 01754 01755 /* psi sum */ 01756 { 01757 int n; 01758 double add; 01759 double uxx = ux*d[0]; 01760 double uxrho2 = ux2*rho2; 01761 double uxrho_2n; 01762 01763 /* n = 0 inflicts only Fx and pot */ 01764 /* one ux is multiplied in to bessel, complex and psi at once, not here */ 01765 eng -= mod_psi_even(0, uxx); 01766 01767 uxrho_2n = uxrho2; 01768 for (n = 1; n < n_modPsi; n++) { 01769 add = uxrho_2n*mod_psi_even(n, uxx); 01770 eng -= add; 01771 if (fabs(add) < part_error) 01772 break; 01773 uxrho_2n *= uxrho2; 01774 } 01775 // fprintf(stderr, " psi energy %f %d\n", eng, n); 01776 } 01777 01778 eng *= ux; 01779 01780 /* explicitly added potentials r_{-1,0} and r_{1,0} */ 01781 { 01782 double cx = d[0] + box_l[0]; 01783 double rinv = sqrt(1.0/(cx*cx + rho2)); 01784 eng += rinv; 01785 01786 cx = d[0] - box_l[0]; 01787 rinv = sqrt(1.0/(cx*cx + rho2)); 01788 eng += rinv; 01789 01790 // fprintf(stderr, "explcit energy %f %f %f %f\n", d[0], d[1], d[2], eng); 01791 } 01792 01793 return eng; 01794 } 01795 01796 double mmm2d_coulomb_pair_energy(double charge_factor, 01797 double dv[3], double d2, double d) 01798 { 01799 double eng, pref = coulomb.prefactor*charge_factor; 01800 if (pref != 0.0) { 01801 eng = calc_mmm2d_copy_pair_energy(dv); 01802 return pref*(eng + 1/d); 01803 } 01804 return 0.0; 01805 } 01806 01807 void MMM2D_self_energy() 01808 { 01809 int c, np, i; 01810 Particle *part; 01811 double dv[3] = {0, 0, 0}; 01812 double seng = coulomb.prefactor*calc_mmm2d_copy_pair_energy(dv); 01813 self_energy = 0; 01814 01815 /* this one gives twice the real self energy, as it is used 01816 in the far formula which counts everything twice and in 01817 the end divides by two*/ 01818 01819 // fprintf(stderr, "%d: self energy %g\n", this_node, seng); 01820 01821 for (c = 0; c < local_cells.n; c++) { 01822 np = local_cells.cell[c]->n; 01823 part = local_cells.cell[c]->part; 01824 01825 for (i = 0; i < np; i++) { 01826 self_energy += seng*SQR(part[i].p.q); 01827 } 01828 } 01829 } 01830 01831 /**************************************** 01832 * COMMON PARTS 01833 ****************************************/ 01834 01835 int MMM2D_set_params(double maxPWerror, double far_cut, double delta_top, double delta_bot) 01836 { 01837 int err; 01838 01839 if (cell_structure.type != CELL_STRUCTURE_NSQUARE && 01840 cell_structure.type != CELL_STRUCTURE_LAYERED) { 01841 return ES_ERROR; 01842 } 01843 01844 mmm2d_params.maxPWerror = maxPWerror; 01845 01846 if (delta_top != 0.0 || delta_bot != 0.0) { 01847 mmm2d_params.dielectric_contrast_on = 1; 01848 mmm2d_params.delta_mid_top = delta_top; 01849 mmm2d_params.delta_mid_bot = delta_bot; 01850 mmm2d_params.delta_mult = delta_top*delta_bot; 01851 } 01852 else { 01853 mmm2d_params.dielectric_contrast_on = 0; 01854 mmm2d_params.delta_mid_top = 0; 01855 mmm2d_params.delta_mid_bot = 0; 01856 mmm2d_params.delta_mult = 0; 01857 } 01858 01859 MMM2D_setup_constants(); 01860 01861 if ((err = MMM2D_tune_near(maxPWerror))) 01862 return err; 01863 01864 /* if we cannot do the far formula, force off */ 01865 if (cell_structure.type == CELL_STRUCTURE_NSQUARE || 01866 (cell_structure.type == CELL_STRUCTURE_LAYERED && n_nodes*n_layers < 3)) { 01867 mmm2d_params.far_cut = 0.0; 01868 if (mmm2d_params.dielectric_contrast_on) { 01869 return ERROR_ICL; 01870 } 01871 } 01872 else { 01873 mmm2d_params.far_cut = far_cut; 01874 mmm2d_params.far_cut2 = SQR(far_cut); 01875 if (mmm2d_params.far_cut > 0) 01876 mmm2d_params.far_calculated = 0; 01877 else { 01878 if ((err = MMM2D_tune_far(maxPWerror))) 01879 return err; 01880 mmm2d_params.far_calculated = 1; 01881 } 01882 } 01883 01884 coulomb.method = COULOMB_MMM2D; 01885 01886 mpi_bcast_coulomb_params(); 01887 01888 return ES_OK; 01889 } 01890 01891 int MMM2D_sanity_checks() 01892 { 01893 char *errtxt; 01894 01895 if (!PERIODIC(0) || !PERIODIC(1) || PERIODIC(2)) { 01896 errtxt = runtime_error(128); 01897 ERROR_SPRINTF(errtxt, "{025 MMM2D requires periodicity 1 1 0} "); 01898 return 1; 01899 } 01900 01901 if (cell_structure.type != CELL_STRUCTURE_LAYERED && 01902 cell_structure.type != CELL_STRUCTURE_NSQUARE) { 01903 errtxt = runtime_error(128); 01904 ERROR_SPRINTF(errtxt, "{026 MMM2D at present requires layered (or n-square) cellsystem} "); 01905 return 1; 01906 } 01907 return 0; 01908 } 01909 01910 void MMM2D_init() 01911 { 01912 int err; 01913 char *errtxt; 01914 01915 if (MMM2D_sanity_checks()) return; 01916 01917 MMM2D_setup_constants(); 01918 if ((err = MMM2D_tune_near(mmm2d_params.maxPWerror))) { 01919 errtxt = runtime_error(128); 01920 ERROR_SPRINTF(errtxt, "{027 MMM2D auto-retuning: %s} ", mmm2d_errors[err]); 01921 coulomb.method = COULOMB_NONE; 01922 return; 01923 } 01924 if (cell_structure.type == CELL_STRUCTURE_NSQUARE || 01925 (cell_structure.type == CELL_STRUCTURE_LAYERED && n_nodes*n_layers < 3)) { 01926 mmm2d_params.far_cut = 0.0; 01927 if (mmm2d_params.dielectric_contrast_on) { 01928 errtxt = runtime_error(128); 01929 ERROR_SPRINTF(errtxt, "{027 MMM2D auto-retuning: IC requires layered cellsystem with > 3 layers} "); 01930 } 01931 } 01932 else { 01933 if (mmm2d_params.far_calculated) { 01934 if ((err = MMM2D_tune_far(mmm2d_params.maxPWerror))) { 01935 errtxt = runtime_error(128); 01936 ERROR_SPRINTF(errtxt, "{028 MMM2D auto-retuning: %s} ", mmm2d_errors[err]); 01937 coulomb.method = COULOMB_NONE; 01938 return; 01939 } 01940 } 01941 } 01942 } 01943 01944 void MMM2D_on_resort_particles() 01945 { 01946 /* if we need MMM2D far formula, allocate caches */ 01947 if (cell_structure.type == CELL_STRUCTURE_LAYERED) { 01948 n_localpart = cells_get_n_particles(); 01949 n_scxcache = (int)(ceil(mmm2d_params.far_cut/ux) + 1); 01950 n_scycache = (int)(ceil(mmm2d_params.far_cut/uy) + 1); 01951 scxcache = realloc(scxcache, n_scxcache*n_localpart*sizeof(SCCache)); 01952 scycache = realloc(scycache, n_scycache*n_localpart*sizeof(SCCache)); 01953 01954 partblk = realloc(partblk, n_localpart*8*sizeof(double)); 01955 lclcblk = realloc(lclcblk, n_cells*8*sizeof(double)); 01956 gblcblk = realloc(gblcblk, n_layers*8*sizeof(double)); 01957 } 01958 MMM2D_self_energy(); 01959 } 01960 01961 01962 void MMM2D_dielectric_layers_force_contribution() 01963 { 01964 int c, i, j; 01965 Cell *celll; 01966 int npl; 01967 Particle *pl, *p1; 01968 double dist2, d[3]; 01969 double charge_factor; 01970 double a[3]; 01971 double force[3]={0, 0, 0}; 01972 01973 if (!mmm2d_params.dielectric_contrast_on) return; 01974 01975 if(this_node==0) { 01976 c=1; 01977 celll = &cells[c]; 01978 pl = celll->part; 01979 npl = celll->n; 01980 01981 for(i = 0; i < npl; i++) { 01982 // printf("enter mmm2d_dielectric %f \n",my_left[2]); 01983 force[0]=0;force[1]=0;force[2]=0; 01984 p1 = &pl[i]; 01985 for(j = 0; j < npl; j++) { 01986 a[0]=pl[j].r.p[0]; a[1]=pl[j].r.p[1];a[2]=-pl[j].r.p[2]; 01987 layered_get_mi_vector(d, p1->r.p, a); 01988 dist2 = sqrlen(d); 01989 charge_factor=p1->p.q*pl[j].p.q*mmm2d_params.delta_mid_bot; 01990 add_mmm2d_coulomb_pair_force(charge_factor, d, sqrt(dist2), dist2, force); 01991 } 01992 for (j = 0; j < 3; j++) { 01993 p1->f.f[j] += force[j]; 01994 } 01995 } 01996 } 01997 01998 if(this_node==n_nodes-1) { 01999 02000 c=n_layers; 02001 celll = &cells[c]; 02002 pl = celll->part; 02003 npl = celll->n; 02004 for(i = 0; i < npl; i++) { 02005 force[0]=0;force[1]=0;force[2]=0; 02006 p1 = &pl[i]; 02007 for(j = 0; j < npl; j++) { 02008 a[0]=pl[j].r.p[0]; a[1]=pl[j].r.p[1]; a[2]=2*box_l[2]-pl[j].r.p[2]; 02009 layered_get_mi_vector(d, p1->r.p, a); 02010 dist2 = sqrlen(d); 02011 charge_factor=p1->p.q*pl[j].p.q*mmm2d_params.delta_mid_top; 02012 add_mmm2d_coulomb_pair_force(charge_factor, d, sqrt(dist2), dist2, force); 02013 } 02014 for (j = 0; j < 3; j++) { 02015 p1->f.f[j] += force[j]; 02016 } 02017 } 02018 } 02019 02020 } 02021 02022 double MMM2D_dielectric_layers_energy_contribution() 02023 { 02024 int c, i, j; 02025 Cell *celll; 02026 int npl; 02027 Particle *pl, *p1; 02028 double dist2, d[3]; 02029 double charge_factor; 02030 double a[3]; 02031 double eng=0.0; 02032 // prefactor for the charged plate interaction removal 02033 double corr_pref = coulomb.prefactor*C_2PI*ux*uy; 02034 02035 if (!mmm2d_params.dielectric_contrast_on) return 0.0; 02036 02037 if(this_node==0) { 02038 c=1; 02039 celll = &cells[c]; 02040 pl = celll->part; 02041 npl = celll->n; 02042 02043 for(i = 0; i < npl; i++) { 02044 p1 = &pl[i]; 02045 for(j = 0; j < npl; j++) { 02046 a[0]=pl[j].r.p[0]; a[1]=pl[j].r.p[1];a[2]=-pl[j].r.p[2]; 02047 layered_get_mi_vector(d, p1->r.p, a); 02048 dist2 = sqrlen(d); 02049 charge_factor = mmm2d_params.delta_mid_bot*p1->p.q*pl[j].p.q; 02050 eng+=mmm2d_coulomb_pair_energy(charge_factor, d, dist2, sqrt(dist2)) + corr_pref*charge_factor*d[2]; 02051 } 02052 } 02053 } 02054 02055 if(this_node==n_nodes-1) { 02056 c=n_layers; 02057 celll = &cells[c]; 02058 pl = celll->part; 02059 npl = celll->n; 02060 for(i = 0; i < npl; i++) { 02061 p1 = &pl[i]; 02062 for(j = 0; j < npl; j++) { 02063 a[0]=pl[j].r.p[0]; a[1]=pl[j].r.p[1]; a[2]=2*box_l[2]-pl[j].r.p[2]; 02064 layered_get_mi_vector(d, p1->r.p, a); 02065 dist2 = sqrlen(d); 02066 charge_factor=mmm2d_params.delta_mid_top*p1->p.q*pl[j].p.q; 02067 eng+=mmm2d_coulomb_pair_energy(charge_factor, d, dist2, sqrt(dist2)) - corr_pref*charge_factor*d[2]; 02068 } 02069 } 02070 } 02071 return 0.5*eng; 02072 } 02073 02074 #endif
1.7.5.1