![]() |
ESPResSo 3.2.0-167-g2c9ead1-git
Extensible Simulation Package for Soft Matter Research
|
00001 /* 00002 Copyright (C) 2010,2011,2012,2013 The ESPResSo project 00003 Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 00004 Max-Planck-Institute for Polymer Research, Theory Group 00005 00006 This file is part of ESPResSo. 00007 00008 ESPResSo is free software: you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation, either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 ESPResSo is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program. If not, see <http://www.gnu.org/licenses/>. 00020 */ 00021 /** \file p3m-dipolar.c P3M algorithm for long range magnetic dipole-dipole interaction. 00022 * 00023 NB: In general the magnetic dipole-dipole functions bear the same 00024 name than the charge-charge but, adding in front of the name a D 00025 and replacing where "charge" appears by "dipole". In this way one 00026 can recognize the similarity of the functions but avoiding nasty 00027 confusions in their use. 00028 00029 PS: By default the magnetic epsilon is metallic = 0. 00030 */ 00031 00032 #include "p3m-dipolar.h" 00033 00034 #include <mpi.h> 00035 #include <stdio.h> 00036 #include <stdlib.h> 00037 #include <string.h> 00038 00039 #include "utils.h" 00040 #include "integrate.h" 00041 #include "global.h" 00042 #include "grid.h" 00043 #include "domain_decomposition.h" 00044 #include "particle_data.h" 00045 #include "communication.h" 00046 #include "fft-dipolar.h" 00047 #include "thermostat.h" 00048 #include "cells.h" 00049 #include "tuning.h" 00050 00051 #ifdef DP3M 00052 00053 /************************************************ 00054 * DEFINES 00055 ************************************************/ 00056 00057 /* MPI tags for the charge-charge p3m communications: */ 00058 /** Tag for communication in P3M_init() -> send_calc_mesh(). */ 00059 #define REQ_P3M_INIT_D 2001 00060 /** Tag for communication in p3m_gather_fft_grid(). */ 00061 #define REQ_P3M_GATHER_D 2011 00062 /** Tag for communication in p3m_spread_force_grid(). */ 00063 #define REQ_P3M_SPREAD_D 2021 00064 00065 /************************************************ 00066 * variables 00067 ************************************************/ 00068 00069 dp3m_data_struct dp3m; 00070 00071 /** \name Private Functions */ 00072 /************************************************************/ 00073 /*@{*/ 00074 00075 00076 /** Calculates for magnetic dipoles the properties of the send/recv sub-meshes of the local FFT mesh. 00077 * In order to calculate the recv sub-meshes there is a communication of 00078 * the margins between neighbouring nodes. */ 00079 static void dp3m_calc_send_mesh(); 00080 00081 /** Initializes for magnetic dipoles the (inverse) mesh constant \ref 00082 p3m_parameter_struct::a (\ref p3m_parameter_struct::ai) and the cutoff for charge 00083 assignment \ref p3m_parameter_struct::cao_cut, which has to be done by \ref 00084 dp3m_init once and by \ref dp3m_scaleby_box_l 00085 whenever the \ref box_l changed. */ 00086 static void dp3m_init_a_ai_cao_cut(); 00087 00088 00089 /** Calculate for magnetic dipoles the spacial position of the left down mesh point of the local mesh, to be 00090 stored in \ref p3m_local_mesh::ld_pos; function called by \ref dp3m_calc_local_ca_mesh once 00091 and by \ref dp3m_scaleby_box_l whenever the \ref box_l changed. */ 00092 static void dp3m_calc_lm_ld_pos(); 00093 00094 00095 /** Gather FFT grid. 00096 * After the charge assignment Each node needs to gather the 00097 * information for the FFT grid in his spatial domain. 00098 */ 00099 static void dp3m_gather_fft_grid(double* mesh); 00100 00101 /** Spread force grid. 00102 * After the k-space calculations each node needs to get all force 00103 * information to reassigne the forces from the grid to the 00104 * particles. 00105 */ 00106 static void dp3m_spread_force_grid(double* mesh); 00107 00108 /** realloc charge assignment fields. */ 00109 static void dp3m_realloc_ca_fields(int newsize); 00110 00111 00112 /** Initializes the (inverse) mesh constant \ref p3m_parameter_struct::a (\ref 00113 p3m_parameter_struct::ai) and the cutoff for charge assignment \ref 00114 p3m_parameter_struct::cao_cut, which has to be done by \ref dp3m_init 00115 once and by \ref dp3m_scaleby_box_l whenever the \ref box_l 00116 changed. */ 00117 static void dp3m_init_a_ai_cao_cut(); 00118 00119 00120 /** checks for correctness for magnetic dipoles in P3M of the cao_cut, necessary when the box length changes */ 00121 static int dp3m_sanity_checks_boxl(void); 00122 00123 00124 /** Calculate the spacial position of the left down mesh point of the local mesh, to be 00125 stored in \ref p3m_local_mesh::ld_pos; function called by \ref dp3m_calc_local_ca_mesh once 00126 and by \ref dp3m_scaleby_box_l whenever the \ref box_l changed. */ 00127 static void dp3m_calc_lm_ld_pos(); 00128 00129 00130 /** Calculates properties of the local FFT mesh for the 00131 charge assignment process. */ 00132 static void dp3m_calc_local_ca_mesh(); 00133 00134 /** Interpolates the P-th order charge assignment function from 00135 * Hockney/Eastwood 5-189 (or 8-61). The following charge fractions 00136 * are also tabulated in Deserno/Holm. */ 00137 static void dp3m_interpolate_dipole_assignment_function(); 00138 00139 /** shifts the mesh points by mesh/2 */ 00140 static void dp3m_calc_meshift(); 00141 00142 /** Calculates the Fourier transformed differential operator. 00143 * Remark: This is done on the level of n-vectors and not k-vectors, 00144 * i.e. the prefactor i*2*PI/L is missing! */ 00145 static void dp3m_calc_differential_operator(); 00146 00147 /** Calculates the influence function optimized for the dipolar forces. */ 00148 static void dp3m_calc_influence_function_force(); 00149 00150 /** Calculates the influence function optimized for the dipolar energy and torques. */ 00151 static void dp3m_calc_influence_function_energy(); 00152 00153 /** Calculates the constants necessary to correct the dipolar energy to minimize the error. */ 00154 static void dp3m_compute_constants_energy_dipolar(); 00155 00156 /** Calculates the aliasing sums for the optimal influence function. 00157 * 00158 * Calculates the aliasing sums in the nominator and denominator of 00159 * the expression for the optimal influence function (see 00160 * Hockney/Eastwood: 8-22, p. 275). 00161 * 00162 * \param n n-vector for which the aliasing sum is to be performed. 00163 * \param nominator aliasing sums in the nominator. 00164 * \return denominator aliasing sum in the denominator 00165 */ 00166 static double dp3m_perform_aliasing_sums_force(int n[3], double nominator[1]); 00167 static double dp3m_perform_aliasing_sums_energy(int n[3], double nominator[1]); 00168 00169 static double dp3m_k_space_error(double box_size, double prefac, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L); 00170 /*@}*/ 00171 00172 00173 /* Compute the dipolar surface terms */ 00174 static double calc_surface_term(int force_flag, int energy_flag); 00175 00176 00177 /** \name P3M Tuning Functions (private)*/ 00178 /************************************************************/ 00179 /*@{*/ 00180 00181 00182 // These 3 functions are to tune the P3M code in the case of dipolar interactions 00183 00184 double P3M_DIPOLAR_real_space_error(double box_size, double prefac, double r_cut_iL, 00185 int n_c_part, double sum_q2, double alpha_L); 00186 static void dp3m_tune_aliasing_sums(int nx, int ny, int nz, 00187 int mesh, double mesh_i, int cao, double alpha_L_i, 00188 double *alias1, double *alias2) ; 00189 00190 // To compute the value of alpha through a bibisection method from the formula 33 of JCP115,6351,(2001). 00191 double dp3m_rtbisection(double box_size, double prefac, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy); 00192 00193 /*@}*/ 00194 00195 /************************************************************/ 00196 /* functions related to the correction of the dipolar p3m-energy */ 00197 00198 static double dp3m_average_dipolar_self_energy(double box_l, int mesh); 00199 static double dp3m_perform_aliasing_sums_dipolar_self_energy(int n[3]); 00200 00201 /************************************************************/ 00202 /* functions related to the correction of the dipolar p3m-energy */ 00203 00204 /* 00205 00206 // Do the sum over k<>0 where k={kx,ky,kz} with kx,ky,kz INTEGERS, of 00207 // exp(-PI**2*k**2/alpha**2/L**2) 00208 static double dp3m_sumi1(double alpha_L){ 00209 int k2,kx,ky,kz,kx2,ky2,limit=60; 00210 double suma,alpha_L2; 00211 00212 alpha_L2= alpha_L* alpha_L; 00213 00214 //fprintf(stderr,"alpha_L=%le\n",alpha_L); 00215 //fprintf(stderr,"PI=%le\n",PI); 00216 00217 00218 suma=0.0; 00219 for(kx=-limit;kx<=limit;kx++){ 00220 kx2=kx*kx; 00221 for(ky=-limit;ky<=limit;ky++){ 00222 ky2=ky*ky; 00223 for(kz=-limit;kz<=limit;kz++){ 00224 k2=kx2+ky2+kz*kz; 00225 suma+=exp(-PI*PI*k2/(alpha_L*alpha_L)); 00226 }}} 00227 suma-=1; //It's easier to substract the term k=0 later than put an if inside the loops 00228 00229 00230 //fprintf(stderr,"suma=%le\n",suma); 00231 00232 00233 return suma; 00234 } 00235 00236 */ 00237 00238 /************************************************************/ 00239 00240 /* 00241 // Do the sum over n<>0 where n={nx*L,ny*L,nz*L} with nx,ny,nz INTEGERS, of 00242 // exp(-alpha_iL**2*n**2) 00243 static double dp3m_sumi2(double alpha_L){ 00244 int n2,nx,ny,nz,nx2,ny2,limit=60; 00245 double suma; 00246 00247 00248 00249 suma=0.0; 00250 for(nx=-limit;nx<=limit;nx++){ 00251 nx2=nx*nx; 00252 for(ny=-limit;ny<=limit;ny++){ 00253 ny2=ny*ny; 00254 for(nz=-limit;nz<=limit;nz++){ 00255 n2=nx2+ny2+nz*nz; 00256 suma+=exp(-alpha_L*alpha_L*n2); 00257 }}} 00258 suma-=1; //It's easier to substract the term n=0 later than put an if inside the loops 00259 00260 00261 00262 return suma; 00263 } 00264 00265 */ 00266 00267 void dp3m_pre_init(void) { 00268 p3m_common_parameter_pre_init(&dp3m.params); 00269 dp3m.params.epsilon = P3M_EPSILON_MAGNETIC; 00270 00271 /* dp3m.local_mesh is uninitialized */ 00272 /* dp3m.sm is uninitialized */ 00273 dp3m.rs_mesh = NULL; 00274 dp3m.rs_mesh_dip[0] = NULL; 00275 dp3m.rs_mesh_dip[1] = NULL; 00276 dp3m.rs_mesh_dip[2] = NULL; 00277 dp3m.ks_mesh = NULL; 00278 00279 dp3m.sum_dip_part = 0; 00280 dp3m.sum_mu2 = 0.0; 00281 00282 for (int i = 0; i < 7; i++) 00283 dp3m.int_caf[i] = NULL; 00284 dp3m.pos_shift = 0.0; 00285 dp3m.meshift = NULL; 00286 00287 dp3m.d_op = NULL; 00288 dp3m.g_force = NULL; 00289 dp3m.g_energy = NULL; 00290 00291 dp3m.ca_num = 0; 00292 dp3m.ca_frac = NULL; 00293 dp3m.ca_fmp = NULL; 00294 dp3m.ks_pnum = 0; 00295 00296 dp3m.send_grid = NULL; 00297 dp3m.recv_grid = NULL; 00298 00299 dp3m.energy_correction = 0.0; 00300 00301 dfft_pre_init(); 00302 } 00303 00304 void dp3m_set_bjerrum() { 00305 dp3m.params.alpha = 0.0; 00306 dp3m.params.alpha_L = 0.0; 00307 dp3m.params.r_cut = 0.0; 00308 dp3m.params.r_cut_iL = 0.0; 00309 dp3m.params.mesh[0] = 0; 00310 dp3m.params.mesh[1] = 0; 00311 dp3m.params.mesh[2] = 0; 00312 dp3m.params.cao = 0; 00313 } 00314 00315 00316 void dp3m_init() { 00317 int n; 00318 00319 if (coulomb.Dbjerrum == 0.0) { 00320 if(coulomb.Dbjerrum == 0.0) { 00321 dp3m.params.r_cut = 0.0; 00322 dp3m.params.r_cut_iL = 0.0; 00323 if(this_node==0) 00324 P3M_TRACE(fprintf(stderr,"0: dp3m_init: dipolar Bjerrum length is zero.\n"); 00325 fprintf(stderr," Magnetostatics of dipoles switched off!\n")); 00326 } 00327 } else { 00328 P3M_TRACE(fprintf(stderr,"%d: dp3m_init: \n",this_node)); 00329 00330 if (dp3m_sanity_checks()) return; 00331 00332 P3M_TRACE(fprintf(stderr,"%d: dp3m_init: starting\n",this_node)); 00333 00334 P3M_TRACE(fprintf(stderr,"%d: mesh=%d, cao=%d, mesh_off=(%f,%f,%f)\n",this_node,dp3m.params.mesh[0],dp3m.params.cao,dp3m.params.mesh_off[0],dp3m.params.mesh_off[1],dp3m.params.mesh_off[2])); 00335 dp3m.params.cao3 = dp3m.params.cao*dp3m.params.cao*dp3m.params.cao; 00336 00337 00338 /* initializes the (inverse) mesh constant dp3m.params.a (dp3m.params.ai) and the cutoff for charge assignment dp3m.params.cao_cut */ 00339 dp3m_init_a_ai_cao_cut(); 00340 00341 /* initialize ca fields to size CA_INCREMENT: dp3m.ca_frac and dp3m.ca_fmp */ 00342 dp3m.ca_num = 0; 00343 if(dp3m.ca_num < CA_INCREMENT) { 00344 dp3m.ca_num = 0; 00345 dp3m_realloc_ca_fields(CA_INCREMENT); 00346 } 00347 00348 dp3m_calc_local_ca_mesh(); 00349 00350 dp3m_calc_send_mesh(); 00351 P3M_TRACE(p3m_p3m_print_local_mesh(dp3m.local_mesh)); 00352 00353 /* DEBUG */ 00354 for(n=0;n<n_nodes;n++) { 00355 /* MPI_Barrier(comm_cart); */ 00356 if(n==this_node) P3M_TRACE(p3m_p3m_print_send_mesh(dp3m.sm)); 00357 } 00358 00359 dp3m.send_grid = (double *) realloc(dp3m.send_grid, sizeof(double)*dp3m.sm.max); 00360 dp3m.recv_grid = (double *) realloc(dp3m.recv_grid, sizeof(double)*dp3m.sm.max); 00361 00362 /* fix box length dependent constants */ 00363 dp3m_scaleby_box_l(); 00364 00365 if (dp3m.params.inter > 0) dp3m_interpolate_dipole_assignment_function(); 00366 00367 dp3m.pos_shift = (double)((dp3m.params.cao-1)/2) - (dp3m.params.cao%2)/2.0; 00368 P3M_TRACE(fprintf(stderr,"%d: dipolar pos_shift = %f\n",this_node,dp3m.pos_shift)); 00369 00370 /* FFT */ 00371 P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh ADR=%p\n",this_node,dp3m.rs_mesh)); 00372 00373 int ca_mesh_size = dfft_init(&dp3m.rs_mesh, 00374 dp3m.local_mesh.dim,dp3m.local_mesh.margin, 00375 dp3m.params.mesh, dp3m.params.mesh_off, 00376 &dp3m.ks_pnum); 00377 dp3m.ks_mesh = (double *) realloc(dp3m.ks_mesh, ca_mesh_size*sizeof(double)); 00378 00379 for (n=0;n<3;n++) 00380 dp3m.rs_mesh_dip[n] = (double *) realloc(dp3m.rs_mesh_dip[n], ca_mesh_size*sizeof(double)); 00381 00382 P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[0] ADR=%p\n",this_node,dp3m.rs_mesh_dip[0])); 00383 P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[1] ADR=%p\n",this_node,dp3m.rs_mesh_dip[1])); 00384 P3M_TRACE(fprintf(stderr,"%d: dp3m.rs_mesh_dip[2] ADR=%p\n",this_node,dp3m.rs_mesh_dip[2])); 00385 00386 00387 /* k-space part: */ 00388 00389 dp3m_calc_differential_operator(); 00390 00391 dp3m_calc_influence_function_force(); 00392 dp3m_calc_influence_function_energy(); 00393 00394 dp3m_count_magnetic_particles(); 00395 00396 P3M_TRACE(fprintf(stderr,"%d: p3m initialized\n",this_node)); 00397 } 00398 } 00399 00400 void dp3m_free_dipoles() { 00401 for (int i=0;i<3;i++) free(dp3m.rs_mesh_dip[i]); 00402 free(dp3m.ca_frac); 00403 free(dp3m.ca_fmp); 00404 free(dp3m.send_grid); 00405 free(dp3m.recv_grid); 00406 free(dp3m.rs_mesh); 00407 free(dp3m.ks_mesh); 00408 } 00409 00410 double dp3m_average_dipolar_self_energy(double box_l, int mesh) { 00411 int i,ind,n[3]; 00412 double node_phi = 0.0, phi = 0.0; 00413 double U2; 00414 00415 int end[3]; 00416 int size=1; 00417 00418 for(i=0;i<3;i++) { 00419 size *= dfft.plan[3].new_mesh[i]; 00420 end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; 00421 } 00422 00423 for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++){ 00424 for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++){ 00425 for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) { 00426 ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] * 00427 ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0]))); 00428 00429 if( (n[0]==0) && (n[1]==0) && (n[2]==0) ) 00430 node_phi += 0.0; 00431 else if( (n[0]%(dp3m.params.mesh[0]/2)==0) && 00432 (n[1]%(dp3m.params.mesh[0]/2)==0) && 00433 (n[2]%(dp3m.params.mesh[0]/2)==0) ) 00434 node_phi += 0.0; 00435 else { 00436 U2 = dp3m_perform_aliasing_sums_dipolar_self_energy(n); 00437 node_phi += dp3m.g_energy[ind] * U2*(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]])); 00438 } 00439 }}} 00440 00441 00442 MPI_Reduce(&node_phi, &phi, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart); 00443 00444 phi*=PI/3./box_l/pow(mesh,3); 00445 00446 00447 return phi ; 00448 } 00449 00450 00451 00452 double dp3m_perform_aliasing_sums_dipolar_self_energy(int n[3]) 00453 { 00454 double u_sum = 0.0; 00455 /* lots of temporary variables... */ 00456 double f1,sx,sy,sz,mx,my,mz,nmx,nmy,nmz; 00457 int limit=P3M_BRILLOUIN+5; 00458 00459 f1 = 1.0/(double)dp3m.params.mesh[0]; 00460 00461 for(mx = -limit; mx <=limit; mx++) { 00462 nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx; 00463 sx = pow(sinc(f1*nmx),2.0*dp3m.params.cao); 00464 for(my = -limit; my <= limit; my++) { 00465 nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my; 00466 sy = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao); 00467 for(mz = -limit; mz <=limit; mz++) { 00468 nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz; 00469 sz = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao); 00470 u_sum += sz; 00471 } 00472 } 00473 } 00474 return u_sum; 00475 } 00476 00477 00478 00479 00480 /****************** functions related to the parsing&tuning of the dipolar parameters **********/ 00481 00482 00483 void dp3m_set_tune_params(double r_cut, int mesh, int cao, 00484 double alpha, double accuracy, int n_interpol) 00485 { 00486 if (r_cut >= 0) { 00487 dp3m.params.r_cut = r_cut; 00488 dp3m.params.r_cut_iL = r_cut*box_l_i[0]; 00489 } 00490 00491 if (mesh >= 0) 00492 dp3m.params.mesh[2] = dp3m.params.mesh[1] = dp3m.params.mesh[0] = mesh; 00493 00494 if (cao >= 0) 00495 dp3m.params.cao = cao; 00496 00497 if (alpha >= 0) { 00498 dp3m.params.alpha = alpha; 00499 dp3m.params.alpha_L = alpha*box_l[0]; 00500 } 00501 00502 if (accuracy >= 0) 00503 dp3m.params.accuracy = accuracy; 00504 00505 if (n_interpol != -1) 00506 dp3m.params.inter = n_interpol; 00507 00508 coulomb.Dprefactor = (temperature > 0) ? temperature*coulomb.Dbjerrum : coulomb.Dbjerrum; 00509 00510 } 00511 00512 00513 /*****************************************************************************/ 00514 00515 int dp3m_set_params(double r_cut, int mesh, int cao, 00516 double alpha, double accuracy) 00517 { 00518 if (coulomb.Dmethod != DIPOLAR_P3M && coulomb.Dmethod != DIPOLAR_MDLC_P3M) 00519 coulomb.Dmethod = DIPOLAR_P3M; 00520 00521 if(r_cut < 0) 00522 return -1; 00523 00524 if(mesh < 0) 00525 return -2; 00526 00527 if(cao < 1 || cao > 7 || cao > mesh) 00528 return -3; 00529 00530 dp3m.params.r_cut = r_cut; 00531 dp3m.params.r_cut_iL = r_cut*box_l_i[0]; 00532 dp3m.params.mesh[2] = dp3m.params.mesh[1] = dp3m.params.mesh[0] = mesh; 00533 dp3m.params.cao = cao; 00534 00535 if (alpha > 0) { 00536 dp3m.params.alpha = alpha; 00537 dp3m.params.alpha_L = alpha*box_l[0]; 00538 } 00539 else 00540 if (alpha != -1.0) 00541 return -4; 00542 00543 if (accuracy >= 0) 00544 dp3m.params.accuracy = accuracy; 00545 else 00546 if (accuracy != -1.0) 00547 return -5; 00548 00549 mpi_bcast_coulomb_params(); 00550 00551 return 0; 00552 } 00553 00554 00555 int dp3m_set_mesh_offset(double x, double y, double z) 00556 { 00557 if(x < 0.0 || x > 1.0 || 00558 y < 0.0 || y > 1.0 || 00559 z < 0.0 || z > 1.0 ) 00560 return ES_ERROR; 00561 00562 dp3m.params.mesh_off[0] = x; 00563 dp3m.params.mesh_off[1] = y; 00564 dp3m.params.mesh_off[2] = z; 00565 00566 mpi_bcast_coulomb_params(); 00567 00568 return ES_OK; 00569 } 00570 00571 /* We left the handling of the epsilon, due to portability reasons in 00572 the future for the electrical dipoles, or if people wants to do 00573 electrical dipoles alone using the magnetic code .. */ 00574 00575 int dp3m_set_eps(double eps) 00576 { 00577 dp3m.params.epsilon = eps; 00578 00579 fprintf(stderr,">> dp3m.params.epsilon =%lf\n",dp3m.params.epsilon); 00580 fprintf(stderr,"if you are doing true MAGNETIC CALCULATIONS the value of Depsilon should be 1, if you change it, you go on your own risk ...\n"); 00581 00582 mpi_bcast_coulomb_params(); 00583 00584 return ES_OK; 00585 } 00586 00587 00588 int dp3m_set_ninterpol(int n) 00589 { 00590 if (n < 0) 00591 return ES_ERROR; 00592 00593 dp3m.params.inter = n; 00594 00595 mpi_bcast_coulomb_params(); 00596 00597 return ES_OK; 00598 } 00599 00600 /*****************************************************************************/ 00601 00602 00603 void dp3m_interpolate_dipole_assignment_function() 00604 { 00605 double dInterpol = 0.5 / (double)dp3m.params.inter; 00606 int i; 00607 long j; 00608 00609 dInterpol = 0.5 / (double)dp3m.params.inter; 00610 if (dp3m.params.inter == 0) return; 00611 00612 P3M_TRACE(fprintf(stderr,"dipolar %d - interpolating (%d) the order-%d charge assignment function\n", 00613 this_node,dp3m.params.inter,dp3m.params.cao)); 00614 00615 dp3m.params.inter2 = 2*dp3m.params.inter + 1; 00616 00617 for (i=0; i < dp3m.params.cao; i++) { 00618 /* allocate memory for interpolation array */ 00619 dp3m.int_caf[i] = (double *) realloc(dp3m.int_caf[i], sizeof(double)*(2*dp3m.params.inter+1)); 00620 00621 /* loop over all interpolation points */ 00622 for (j=-dp3m.params.inter; j<=dp3m.params.inter; j++) 00623 dp3m.int_caf[i][j+dp3m.params.inter] = p3m_caf(i, j*dInterpol,dp3m.params.cao); 00624 } 00625 } 00626 00627 /* assign the dipoles */ 00628 void dp3m_dipole_assign(void) 00629 { 00630 Cell *cell; 00631 Particle *p; 00632 int i,c,np,j; 00633 /* magnetic particle counter, dipole fraction counter */ 00634 int cp_cnt=0; 00635 00636 00637 /* prepare local FFT mesh */ 00638 for(i=0;i<3;i++) 00639 for(j=0; j<dp3m.local_mesh.size; j++) dp3m.rs_mesh_dip[i][j] = 0.0; 00640 00641 for (c = 0; c < local_cells.n; c++) { 00642 cell = local_cells.cell[c]; 00643 p = cell->part; 00644 np = cell->n; 00645 for(i = 0; i < np; i++) { 00646 if( p[i].p.dipm != 0.0) { 00647 dp3m_assign_dipole( p[i].r.p,p[i].p.dipm, p[i].r.dip,cp_cnt); 00648 cp_cnt++; 00649 } 00650 } 00651 } 00652 dp3m_shrink_wrap_dipole_grid(cp_cnt); 00653 00654 } 00655 00656 00657 void dp3m_assign_dipole(double real_pos[3],double mu, double dip[3],int cp_cnt) 00658 { 00659 /* we do not really want to export these, but this function should be inlined */ 00660 double p3m_caf(int i, double x, int cao_value); 00661 void dp3m_realloc_ca_fields(int size); 00662 00663 int d, i0, i1, i2; 00664 double tmp0, tmp1; 00665 /* position of a particle in local mesh units */ 00666 double pos; 00667 /* 1d-index of nearest mesh point */ 00668 int nmp; 00669 /* distance to nearest mesh point */ 00670 double dist[3]; 00671 /* index for caf interpolation grid */ 00672 int arg[3]; 00673 /* index, index jumps for dp3m.rs_mesh array */ 00674 int q_ind = 0; 00675 double cur_ca_frac_val, *cur_ca_frac; 00676 00677 // make sure we have enough space 00678 if (cp_cnt >= dp3m.ca_num) dp3m_realloc_ca_fields(cp_cnt + 1); 00679 // do it here, since p3m_realloc_ca_fields may change the address of dp3m.ca_frac 00680 cur_ca_frac = dp3m.ca_frac + dp3m.params.cao3*cp_cnt; 00681 00682 if (dp3m.params.inter == 0) { 00683 for(d=0;d<3;d++) { 00684 /* particle position in mesh coordinates */ 00685 pos = ((real_pos[d]-dp3m.local_mesh.ld_pos[d])*dp3m.params.ai[d]) - dp3m.pos_shift; 00686 /* nearest mesh point */ 00687 nmp = (int)pos; 00688 /* distance to nearest mesh point */ 00689 dist[d] = (pos-nmp)-0.5; 00690 /* 3d-array index of nearest mesh point */ 00691 q_ind = (d == 0) ? nmp : nmp + dp3m.local_mesh.dim[d]*q_ind; 00692 00693 #ifdef ADDITIONAL_CHECKS 00694 if( pos < -skin*dp3m.params.ai[d] ) { 00695 fprintf(stderr,"%d: dipolar dp3m.rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]); 00696 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00697 this_node,my_left[d] - skin, my_right[d] + skin); 00698 } 00699 if( (nmp + dp3m.params.cao) > dp3m.local_mesh.dim[d] ) { 00700 fprintf(stderr,"%d: dipolar dp3m.rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp); 00701 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00702 this_node, my_left[d] - skin, my_right[d] + skin); 00703 } 00704 #endif 00705 } 00706 if (cp_cnt >= 0) dp3m.ca_fmp[cp_cnt] = q_ind; 00707 00708 for(i0=0; i0<dp3m.params.cao; i0++) { 00709 tmp0 = p3m_caf(i0, dist[0], dp3m.params.cao); 00710 for(i1=0; i1<dp3m.params.cao; i1++) { 00711 tmp1 = tmp0 * p3m_caf(i1, dist[1],dp3m.params.cao); 00712 for(i2=0; i2<dp3m.params.cao; i2++) { 00713 cur_ca_frac_val = tmp1 * p3m_caf(i2, dist[2],dp3m.params.cao); 00714 if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val; 00715 if (mu != 0.0) { 00716 dp3m.rs_mesh_dip[0][q_ind] += dip[0] * cur_ca_frac_val; 00717 dp3m.rs_mesh_dip[1][q_ind] += dip[1] * cur_ca_frac_val; 00718 dp3m.rs_mesh_dip[2][q_ind] += dip[2] * cur_ca_frac_val; 00719 } 00720 q_ind++; 00721 } 00722 q_ind += dp3m.local_mesh.q_2_off; 00723 } 00724 q_ind += dp3m.local_mesh.q_21_off; 00725 } 00726 } 00727 else { 00728 /* particle position in mesh coordinates */ 00729 for(d=0;d<3;d++) { 00730 pos = ((real_pos[d]-dp3m.local_mesh.ld_pos[d])*dp3m.params.ai[d]) - dp3m.pos_shift; 00731 nmp = (int) pos; 00732 arg[d] = (int) ((pos - nmp)*dp3m.params.inter2); 00733 /* for the first dimension, q_ind is always zero, so this shifts correctly */ 00734 q_ind = nmp + dp3m.local_mesh.dim[d]*q_ind; 00735 00736 #ifdef ADDITIONAL_CHECKS 00737 if( pos < -skin*dp3m.params.ai[d] ) { 00738 fprintf(stderr,"%d: dipolar dp3m.rs_mesh underflow! (pos %f)\n", this_node, real_pos[d]); 00739 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00740 this_node,my_left[d] - skin, my_right[d] + skin); 00741 } 00742 if( (nmp + dp3m.params.cao) > dp3m.local_mesh.dim[d] ) { 00743 fprintf(stderr,"%d: dipolar dp3m.rs_mesh overflow! (pos %f, nmp=%d)\n", this_node, real_pos[d],nmp); 00744 fprintf(stderr,"%d: allowed coordinates: %f - %f\n", 00745 this_node, my_left[d] - skin, my_right[d] + skin); 00746 } 00747 #endif 00748 } 00749 if (cp_cnt >= 0) dp3m.ca_fmp[cp_cnt] = q_ind; 00750 00751 for(i0=0; i0<dp3m.params.cao; i0++) { 00752 tmp0 = dp3m.int_caf[i0][arg[0]]; 00753 for(i1=0; i1<dp3m.params.cao; i1++) { 00754 tmp1 = tmp0 * dp3m.int_caf[i1][arg[1]]; 00755 for(i2=0; i2<dp3m.params.cao; i2++) { 00756 cur_ca_frac_val = tmp1 * dp3m.int_caf[i2][arg[2]]; 00757 if (cp_cnt >= 0) *(cur_ca_frac++) = cur_ca_frac_val; 00758 if (mu != 0.0) { 00759 dp3m.rs_mesh_dip[0][q_ind] += dip[0] * cur_ca_frac_val; 00760 dp3m.rs_mesh_dip[1][q_ind] += dip[1] * cur_ca_frac_val; 00761 dp3m.rs_mesh_dip[2][q_ind] += dip[2] * cur_ca_frac_val; 00762 } 00763 q_ind++; 00764 } 00765 q_ind += dp3m.local_mesh.q_2_off; 00766 } 00767 q_ind += dp3m.local_mesh.q_21_off; 00768 } 00769 } 00770 } 00771 00772 00773 /** shrink wrap the dipoles grid */ 00774 void dp3m_shrink_wrap_dipole_grid(int n_dipoles) { 00775 if( n_dipoles < dp3m.ca_num ) dp3m_realloc_ca_fields(n_dipoles); 00776 } 00777 00778 00779 #ifdef ROTATION 00780 /* assign the torques obtained from k-space */ 00781 static void P3M_assign_torques(double prefac, int d_rs) 00782 { 00783 Cell *cell; 00784 Particle *p; 00785 int i,c,np,i0,i1,i2; 00786 /* particle counter, charge fraction counter */ 00787 int cp_cnt=0, cf_cnt=0; 00788 /* index, index jumps for dp3m.rs_mesh array */ 00789 int q_ind; 00790 int q_m_off = (dp3m.local_mesh.dim[2] - dp3m.params.cao); 00791 int q_s_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao); 00792 #ifdef ONEPART_DEBUG 00793 double db_fsum=0 ; /* TODO: db_fsum was missing and code couldn't compile. Now the arbitrary value of 0 is assigned to it, please check.*/ 00794 #endif 00795 00796 cp_cnt=0; cf_cnt=0; 00797 for (c = 0; c < local_cells.n; c++) { 00798 cell = local_cells.cell[c]; 00799 p = cell->part; 00800 np = cell->n; 00801 for(i=0; i<np; i++) { 00802 if( (p[i].p.dipm) != 0.0 ) { 00803 q_ind = dp3m.ca_fmp[cp_cnt]; 00804 for(i0=0; i0<dp3m.params.cao; i0++) { 00805 for(i1=0; i1<dp3m.params.cao; i1++) { 00806 for(i2=0; i2<dp3m.params.cao; i2++) { 00807 /* 00808 The following line would fill the torque with the k-space electric field 00809 (without the self-field term) [notice the minus sign!]: 00810 p[i].f.torque[d_rs] -= prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind];; 00811 Since the torque is the dipole moment cross-product with E, we have: 00812 */ 00813 switch (d_rs) { 00814 case 0: //E_x 00815 p[i].f.torque[1] -= p[i].r.dip[2]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00816 p[i].f.torque[2] += p[i].r.dip[1]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00817 break; 00818 case 1: //E_y 00819 p[i].f.torque[0] += p[i].r.dip[2]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00820 p[i].f.torque[2] -= p[i].r.dip[0]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00821 break; 00822 case 2: //E_z 00823 p[i].f.torque[0] -= p[i].r.dip[1]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00824 p[i].f.torque[1] += p[i].r.dip[0]*prefac*dp3m.ca_frac[cf_cnt]*dp3m.rs_mesh[q_ind]; 00825 } 00826 q_ind++; 00827 cf_cnt++; 00828 } 00829 q_ind += q_m_off; 00830 } 00831 q_ind += q_s_off; 00832 } 00833 cp_cnt++; 00834 00835 ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum)); 00836 } 00837 } 00838 } 00839 } 00840 #endif 00841 00842 00843 00844 /* assign the dipolar forces obtained from k-space */ 00845 static void dp3m_assign_forces_dip(double prefac, int d_rs) 00846 { 00847 Cell *cell; 00848 Particle *p; 00849 #ifdef ONEPART_DEBUG 00850 double db_fsum=0 ; /* TODO: db_fsum was missing and code couldn't compile. Now the arbitrary value of 0 is assigned to it, please check.*/ 00851 #endif 00852 int i,c,np,i0,i1,i2; 00853 /* particle counter, charge fraction counter */ 00854 int cp_cnt=0, cf_cnt=0; 00855 /* index, index jumps for dp3m.rs_mesh array */ 00856 int q_ind; 00857 int q_m_off = (dp3m.local_mesh.dim[2] - dp3m.params.cao); 00858 int q_s_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao); 00859 00860 cp_cnt=0; cf_cnt=0; 00861 for (c = 0; c < local_cells.n; c++) { 00862 cell = local_cells.cell[c]; 00863 p = cell->part; 00864 np = cell->n; 00865 for(i=0; i<np; i++) { 00866 if( (p[i].p.dipm) != 0.0 ) { 00867 q_ind = dp3m.ca_fmp[cp_cnt]; 00868 for(i0=0; i0<dp3m.params.cao; i0++) { 00869 for(i1=0; i1<dp3m.params.cao; i1++) { 00870 for(i2=0; i2<dp3m.params.cao; i2++) { 00871 p[i].f.f[d_rs] += prefac*dp3m.ca_frac[cf_cnt]* 00872 ( dp3m.rs_mesh_dip[0][q_ind]*p[i].r.dip[0] 00873 +dp3m.rs_mesh_dip[1][q_ind]*p[i].r.dip[1] 00874 +dp3m.rs_mesh_dip[2][q_ind]*p[i].r.dip[2]); 00875 q_ind++; 00876 cf_cnt++; 00877 } 00878 q_ind += q_m_off; 00879 } 00880 q_ind += q_s_off; 00881 } 00882 cp_cnt++; 00883 00884 ONEPART_TRACE(if(p[i].p.identity==check_id) fprintf(stderr,"%d: OPT: P3M f = (%.3e,%.3e,%.3e) in dir %d add %.5f\n",this_node,p[i].f.f[0],p[i].f.f[1],p[i].f.f[2],d_rs,-db_fsum)); 00885 } 00886 } 00887 } 00888 } 00889 00890 /*****************************************************************************/ 00891 00892 00893 double dp3m_calc_kspace_forces(int force_flag, int energy_flag) 00894 { 00895 int i,d,d_rs,ind,j[3]; 00896 /**************************************************************/ 00897 /* k space energy */ 00898 double dipole_prefac; 00899 double surface_term=0.0; 00900 double k_space_energy_dip=0.0, node_k_space_energy_dip=0.0; 00901 double tmp0,tmp1; 00902 00903 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_perform(%d,%d): \n",this_node, force_flag, energy_flag)); 00904 00905 dipole_prefac = coulomb.Dprefactor / (double)(dp3m.params.mesh[0]*dp3m.params.mesh[1]*dp3m.params.mesh[2]); 00906 00907 if (dp3m.sum_mu2 > 0) { 00908 /* Gather information for FFT grid inside the nodes domain (inner local mesh) */ 00909 /* and Perform forward 3D FFT (Charge Assignment Mesh). */ 00910 dp3m_gather_fft_grid(dp3m.rs_mesh_dip[0]); 00911 dp3m_gather_fft_grid(dp3m.rs_mesh_dip[1]); 00912 dp3m_gather_fft_grid(dp3m.rs_mesh_dip[2]); 00913 dfft_perform_forw(dp3m.rs_mesh_dip[0]); 00914 dfft_perform_forw(dp3m.rs_mesh_dip[1]); 00915 dfft_perform_forw(dp3m.rs_mesh_dip[2]); 00916 //Note: after these calls, the grids are in the order yzx and not xyz anymore!!! 00917 } 00918 00919 /* === K Space Calculations === */ 00920 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_perform: k-Space\n",this_node)); 00921 00922 /* === K Space Energy Calculation === */ 00923 if(energy_flag) { 00924 /********************* 00925 Dipolar energy 00926 **********************/ 00927 if (dp3m.sum_mu2 > 0) { 00928 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start Energy calculation: k-Space\n",this_node)); 00929 00930 /* i*k differentiation for dipolar gradients: |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */ 00931 ind=0; 00932 i=0; 00933 for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) { 00934 for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) { 00935 for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) { 00936 node_k_space_energy_dip += dp3m.g_energy[i] * ( 00937 SQR(dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 00938 dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 00939 dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]] 00940 ) + 00941 SQR(dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 00942 dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 00943 dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]] 00944 )); 00945 ind += 2; 00946 i++; 00947 } 00948 } 00949 } 00950 node_k_space_energy_dip *= dipole_prefac * PI / box_l[0]; 00951 MPI_Reduce(&node_k_space_energy_dip, &k_space_energy_dip, 1, MPI_DOUBLE, MPI_SUM, 0, comm_cart); 00952 00953 dp3m_compute_constants_energy_dipolar(); 00954 00955 P3M_TRACE(fprintf(stderr,"%d: dp3m.params.epsilon=%lf\n", this_node, dp3m.params.epsilon)); 00956 00957 if(this_node==0) { 00958 /* self energy correction */ 00959 P3M_TRACE(fprintf(stderr,"%d: *dp3m.energy_correction=%20.15lf\n",\ 00960 this_node, dp3m.energy_correction)); 00961 #ifdef P3M_DEBUG 00962 double a = k_space_energy_dip; 00963 #endif 00964 k_space_energy_dip -= coulomb.Dprefactor*(dp3m.sum_mu2*2*pow(dp3m.params.alpha_L*box_l_i[0],3) * wupii/3.0); 00965 00966 double volume=box_l[0]*box_l[1]*box_l[2]; 00967 k_space_energy_dip += coulomb.Dprefactor*dp3m.energy_correction/volume; /* add the dipolar energy correction due to systematic Madelung-Self effects */ 00968 00969 P3M_TRACE(fprintf(stderr, "%d: Energy correction: %lf\n", this_node, k_space_energy_dip - a)); 00970 } 00971 00972 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m end Energy calculation: k-Space\n",this_node)); 00973 00974 } 00975 } //if (energy_flag) 00976 00977 /* === K Space Force Calculation === */ 00978 if(force_flag) { 00979 /*************************** 00980 DIPOLAR TORQUES (k-space) 00981 ****************************/ 00982 if (dp3m.sum_mu2 > 0) { 00983 #ifdef ROTATION 00984 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start torques calculation: k-Space\n",this_node)); 00985 00986 /* fill in ks_mesh array for torque calculation */ 00987 ind=0; 00988 i=0; 00989 00990 for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) { //j[0]=n_y 00991 for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) { //j[1]=n_z 00992 for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) { //j[2]=n_x 00993 //tmp0 = Re(mu)*k, tmp1 = Im(mu)*k 00994 00995 tmp0 = dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 00996 dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 00997 dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]; 00998 00999 tmp1 = dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 01000 dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 01001 dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]; 01002 01003 /* the optimal influence function is the same for torques 01004 and energy */ 01005 01006 dp3m.ks_mesh[ind] = tmp0*dp3m.g_energy[i]; 01007 dp3m.ks_mesh[ind+1] = tmp1*dp3m.g_energy[i]; 01008 ind += 2; 01009 i++; 01010 } 01011 } 01012 } 01013 01014 01015 /* Force component loop */ 01016 for(d=0;d<3;d++) { 01017 d_rs = (d+dp3m.ks_pnum)%3; 01018 ind=0; 01019 for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) { 01020 for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) { 01021 for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) { 01022 dp3m.rs_mesh[ind] = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; ind++; 01023 dp3m.rs_mesh[ind] = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; ind++; 01024 } 01025 } 01026 } 01027 01028 01029 /* Back FFT force component mesh */ 01030 dfft_perform_back(dp3m.rs_mesh); 01031 /* redistribute force component mesh */ 01032 dp3m_spread_force_grid(dp3m.rs_mesh); 01033 /* Assign force component from mesh to particle */ 01034 P3M_assign_torques(dipole_prefac*(2*PI/box_l[0]), d_rs); 01035 } 01036 P3M_TRACE(fprintf(stderr, "%d: done torque calculation.\n", this_node)); 01037 #endif /*if def ROTATION */ 01038 01039 /*************************** 01040 DIPOLAR FORCES (k-space) 01041 ****************************/ 01042 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m start forces calculation: k-Space\n",this_node)); 01043 01044 //Compute forces after torques because the algorithm below overwrites the grids dp3m.rs_mesh_dip ! 01045 //Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this number to 6 ! 01046 /* fill in ks_mesh array for force calculation */ 01047 ind=0; 01048 i=0; 01049 for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) { //j[0]=n_y 01050 for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) { //j[1]=n_z 01051 for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) { //j[2]=n_x 01052 //tmp0 = Im(mu)*k, tmp1 = -Re(mu)*k 01053 tmp0 = dp3m.rs_mesh_dip[0][ind+1]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 01054 dp3m.rs_mesh_dip[1][ind+1]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 01055 dp3m.rs_mesh_dip[2][ind+1]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]; 01056 tmp1 = dp3m.rs_mesh_dip[0][ind]*dp3m.d_op[j[2]+dfft.plan[3].start[2]]+ 01057 dp3m.rs_mesh_dip[1][ind]*dp3m.d_op[j[0]+dfft.plan[3].start[0]]+ 01058 dp3m.rs_mesh_dip[2][ind]*dp3m.d_op[j[1]+dfft.plan[3].start[1]]; 01059 dp3m.ks_mesh[ind] = tmp0*dp3m.g_force[i]; 01060 dp3m.ks_mesh[ind+1] = -tmp1*dp3m.g_force[i]; 01061 ind += 2; 01062 i++; 01063 } 01064 } 01065 } 01066 01067 /* Force component loop */ 01068 for(d=0;d<3;d++) { /* direction in k space: */ 01069 d_rs = (d+dp3m.ks_pnum)%3; 01070 ind=0; 01071 for(j[0]=0; j[0]<dfft.plan[3].new_mesh[0]; j[0]++) { //j[0]=n_y 01072 for(j[1]=0; j[1]<dfft.plan[3].new_mesh[1]; j[1]++) { //j[1]=n_z 01073 for(j[2]=0; j[2]<dfft.plan[3].new_mesh[2]; j[2]++) { //j[2]=n_x 01074 tmp0 = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; 01075 dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[ j[2]+dfft.plan[3].start[2] ]*tmp0; 01076 dp3m.rs_mesh_dip[1][ind] = dp3m.d_op[ j[0]+dfft.plan[3].start[0] ]*tmp0; 01077 dp3m.rs_mesh_dip[2][ind] = dp3m.d_op[ j[1]+dfft.plan[3].start[1] ]*tmp0; 01078 ind++; 01079 tmp0 = dp3m.d_op[ j[d]+dfft.plan[3].start[d] ]*dp3m.ks_mesh[ind]; 01080 dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[ j[2]+dfft.plan[3].start[2] ]*tmp0; 01081 dp3m.rs_mesh_dip[1][ind] = dp3m.d_op[ j[0]+dfft.plan[3].start[0] ]*tmp0; 01082 dp3m.rs_mesh_dip[2][ind] = dp3m.d_op[ j[1]+dfft.plan[3].start[1] ]*tmp0; 01083 ind++; 01084 } 01085 } 01086 } 01087 /* Back FFT force component mesh */ 01088 dfft_perform_back(dp3m.rs_mesh_dip[0]); 01089 dfft_perform_back(dp3m.rs_mesh_dip[1]); 01090 dfft_perform_back(dp3m.rs_mesh_dip[2]); 01091 /* redistribute force component mesh */ 01092 dp3m_spread_force_grid(dp3m.rs_mesh_dip[0]); 01093 dp3m_spread_force_grid(dp3m.rs_mesh_dip[1]); 01094 dp3m_spread_force_grid(dp3m.rs_mesh_dip[2]); 01095 /* Assign force component from mesh to particle */ 01096 dp3m_assign_forces_dip(dipole_prefac*pow(2*PI/box_l[0],2), d_rs); 01097 } 01098 01099 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m end forces calculation: k-Space\n",this_node)); 01100 01101 01102 } /* of if (dp3m.sum_mu2>0 */ 01103 } /* of if(force_flag) */ 01104 01105 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) { 01106 surface_term = calc_surface_term(force_flag, energy_flag); 01107 if(this_node == 0) 01108 k_space_energy_dip += surface_term; 01109 } 01110 01111 01112 return k_space_energy_dip; 01113 } 01114 01115 01116 01117 /************************************************************/ 01118 01119 double calc_surface_term(int force_flag, int energy_flag) 01120 { 01121 01122 int np, c, i,ip=0,n_local_part=0; 01123 Particle *part; 01124 double pref =coulomb.Dprefactor*4*M_PI*box_l_i[0]*box_l_i[1]*box_l_i[2]/(2*dp3m.params.epsilon + 1); 01125 double suma,a[3]; 01126 double en; 01127 double *mx=NULL,*my=NULL,*mz=NULL; 01128 01129 for (c = 0; c < local_cells.n; c++) 01130 n_local_part += local_cells.cell[c]->n; 01131 01132 // We put all the dipolar momenta in a the arrays mx,my,mz according to the id-number of the particles 01133 mx = (double *) malloc(sizeof(double)*n_local_part); 01134 my = (double *) malloc(sizeof(double)*n_local_part); 01135 mz = (double *) malloc(sizeof(double)*n_local_part); 01136 01137 01138 01139 for (c = 0; c < local_cells.n; c++) { 01140 np = local_cells.cell[c]->n; 01141 part = local_cells.cell[c]->part; 01142 for (i = 0; i < np; i++){ 01143 mx[ip]=part[i].r.dip[0]; 01144 my[ip]=part[i].r.dip[1]; 01145 mz[ip]=part[i].r.dip[2]; 01146 ip++; 01147 } 01148 } 01149 01150 // we will need the sum of all dipolar momenta vectors 01151 a[0]=0.0; 01152 a[1]=0.0; 01153 a[2]=0.0; 01154 01155 for (i = 0; i < n_local_part; i++){ 01156 a[0]+=mx[i]; 01157 a[1]+=my[i]; 01158 a[2]+=mz[i]; 01159 } 01160 01161 MPI_Allreduce(MPI_IN_PLACE, a, 3, MPI_DOUBLE, MPI_SUM, comm_cart); 01162 01163 if (energy_flag) { 01164 01165 suma=0.0; 01166 for (i = 0; i < n_local_part; i++){ 01167 suma+=mx[i]*a[0]+my[i]*a[1]+mz[i]*a[2]; 01168 } 01169 MPI_Allreduce(MPI_IN_PLACE, &suma, 1, MPI_DOUBLE, MPI_SUM, comm_cart); 01170 en = 0.5*pref*suma; 01171 01172 } else { 01173 en = 0; 01174 } 01175 #ifdef ROTATION 01176 if (force_flag) { 01177 //fprintf(stderr," number of particles= %d ",n_total_particles); 01178 01179 double *sumix = (double *) malloc(sizeof(double)*n_local_part); 01180 double *sumiy = (double *) malloc(sizeof(double)*n_local_part); 01181 double *sumiz = (double *) malloc(sizeof(double)*n_local_part); 01182 01183 for (i = 0; i < n_local_part; i++){ 01184 sumix[i]=my[i]*a[2]-mz[i]*a[1]; 01185 sumiy[i]=mz[i]*a[0]-mx[i]*a[2]; 01186 sumiz[i]=mx[i]*a[1]-my[i]*a[0]; 01187 } 01188 01189 // for (i = 0; i < n_total_particles; i++){ 01190 // fprintf(stderr,"part %d, correccions torque x:%le, y:%le, z:%le\n",i,sumix[i],sumiy[i],sumiz[i]); 01191 // } 01192 01193 ip=0; 01194 for (c = 0; c < local_cells.n; c++) { 01195 np = local_cells.cell[c]->n; 01196 part = local_cells.cell[c]->part; 01197 for (i = 0; i < np; i++){ 01198 part[i].f.torque[0] -= pref*sumix[ip]; 01199 part[i].f.torque[1] -= pref*sumiy[ip]; 01200 part[i].f.torque[2] -= pref*sumiz[ip]; 01201 ip++; 01202 } 01203 } 01204 01205 01206 free(sumix); 01207 free(sumiy); 01208 free(sumiz); 01209 } 01210 #endif 01211 01212 free(mx); 01213 free(my); 01214 free(mz); 01215 01216 return en; 01217 01218 } 01219 01220 01221 /************************************************************/ 01222 void dp3m_gather_fft_grid(double* themesh) 01223 { 01224 int s_dir,r_dir,evenodd; 01225 MPI_Status status; 01226 double *tmp_ptr; 01227 01228 P3M_TRACE(fprintf(stderr,"%d: dp3m_gather_fft_grid:\n",this_node)); 01229 01230 /* direction loop */ 01231 for(s_dir=0; s_dir<6; s_dir++) { 01232 if(s_dir%2==0) r_dir = s_dir+1; 01233 else r_dir = s_dir-1; 01234 /* pack send block */ 01235 if(dp3m.sm.s_size[s_dir]>0) 01236 fft_pack_block(themesh, dp3m.send_grid, dp3m.sm.s_ld[s_dir], dp3m.sm.s_dim[s_dir], dp3m.local_mesh.dim, 1); 01237 01238 /* communication */ 01239 if(node_neighbors[s_dir] != this_node) { 01240 for(evenodd=0; evenodd<2;evenodd++) { 01241 if((node_pos[s_dir/2]+evenodd)%2==0) { 01242 if(dp3m.sm.s_size[s_dir]>0) 01243 MPI_Send(dp3m.send_grid, dp3m.sm.s_size[s_dir], MPI_DOUBLE, 01244 node_neighbors[s_dir], REQ_P3M_GATHER_D, comm_cart); 01245 } 01246 else { 01247 if(dp3m.sm.r_size[r_dir]>0) 01248 MPI_Recv(dp3m.recv_grid, dp3m.sm.r_size[r_dir], MPI_DOUBLE, 01249 node_neighbors[r_dir], REQ_P3M_GATHER_D, comm_cart, &status); 01250 } 01251 } 01252 } 01253 else { 01254 tmp_ptr = dp3m.recv_grid; 01255 dp3m.recv_grid = dp3m.send_grid; 01256 dp3m.send_grid = tmp_ptr; 01257 } 01258 /* add recv block */ 01259 if(dp3m.sm.r_size[r_dir]>0) { 01260 p3m_add_block(dp3m.recv_grid, themesh, dp3m.sm.r_ld[r_dir], dp3m.sm.r_dim[r_dir], dp3m.local_mesh.dim); 01261 } 01262 } 01263 } 01264 01265 01266 01267 /************************************************************/ 01268 01269 01270 void dp3m_spread_force_grid(double* themesh) 01271 { 01272 int s_dir,r_dir,evenodd; 01273 MPI_Status status; 01274 double *tmp_ptr; 01275 P3M_TRACE(fprintf(stderr,"%d: dipolar p3m_spread_force_grid:\n",this_node)); 01276 01277 /* direction loop */ 01278 for(s_dir=5; s_dir>=0; s_dir--) { 01279 if(s_dir%2==0) r_dir = s_dir+1; 01280 else r_dir = s_dir-1; 01281 /* pack send block */ 01282 if(dp3m.sm.s_size[s_dir]>0) 01283 fft_pack_block(themesh, dp3m.send_grid, dp3m.sm.r_ld[r_dir], dp3m.sm.r_dim[r_dir], dp3m.local_mesh.dim, 1); 01284 /* communication */ 01285 if(node_neighbors[r_dir] != this_node) { 01286 for(evenodd=0; evenodd<2;evenodd++) { 01287 if((node_pos[r_dir/2]+evenodd)%2==0) { 01288 if(dp3m.sm.r_size[r_dir]>0) 01289 MPI_Send(dp3m.send_grid, dp3m.sm.r_size[r_dir], MPI_DOUBLE, 01290 node_neighbors[r_dir], REQ_P3M_SPREAD_D, comm_cart); 01291 } 01292 else { 01293 if(dp3m.sm.s_size[s_dir]>0) 01294 MPI_Recv(dp3m.recv_grid, dp3m.sm.s_size[s_dir], MPI_DOUBLE, 01295 node_neighbors[s_dir], REQ_P3M_SPREAD_D, comm_cart, &status); 01296 } 01297 } 01298 } 01299 else { 01300 tmp_ptr = dp3m.recv_grid; 01301 dp3m.recv_grid = dp3m.send_grid; 01302 dp3m.send_grid = tmp_ptr; 01303 } 01304 /* un pack recv block */ 01305 if(dp3m.sm.s_size[s_dir]>0) { 01306 fft_unpack_block(dp3m.recv_grid, themesh, dp3m.sm.s_ld[s_dir], dp3m.sm.s_dim[s_dir], dp3m.local_mesh.dim, 1); 01307 } 01308 } 01309 } 01310 01311 01312 /*****************************************************************************/ 01313 01314 void dp3m_realloc_ca_fields(int newsize) 01315 { 01316 newsize = ((newsize + CA_INCREMENT - 1)/CA_INCREMENT)*CA_INCREMENT; 01317 if (newsize == dp3m.ca_num) return; 01318 if (newsize < CA_INCREMENT) newsize = CA_INCREMENT; 01319 01320 P3M_TRACE(fprintf(stderr,"%d: p3m_realloc_ca_fields: dipolar, old_size=%d -> new_size=%d\n",this_node,dp3m.ca_num,newsize)); 01321 dp3m.ca_num = newsize; 01322 dp3m.ca_frac = (double *)realloc(dp3m.ca_frac, dp3m.params.cao3*dp3m.ca_num*sizeof(double)); 01323 dp3m.ca_fmp = (int *)realloc(dp3m.ca_fmp, dp3m.ca_num*sizeof(int)); 01324 01325 } 01326 01327 01328 /*****************************************************************************/ 01329 01330 01331 void dp3m_calc_meshift(void) 01332 { 01333 int i; 01334 double dmesh; 01335 dmesh = (double)dp3m.params.mesh[0]; 01336 dp3m.meshift = (double *) realloc(dp3m.meshift, dp3m.params.mesh[0]*sizeof(double)); 01337 for (i=0; i<dp3m.params.mesh[0]; i++) dp3m.meshift[i] = i - dround(i/dmesh)*dmesh; 01338 } 01339 01340 01341 01342 /*****************************************************************************/ 01343 01344 01345 void dp3m_calc_differential_operator() 01346 { 01347 int i; 01348 double dmesh; 01349 01350 dmesh = (double)dp3m.params.mesh[0]; 01351 dp3m.d_op = (double *) realloc(dp3m.d_op, dp3m.params.mesh[0]*sizeof(double)); 01352 01353 for (i=0; i<dp3m.params.mesh[0]; i++) 01354 dp3m.d_op[i] = (double)i - dround((double)i/dmesh)*dmesh; 01355 01356 dp3m.d_op[dp3m.params.mesh[0]/2] = 0; 01357 } 01358 01359 /*****************************************************************************/ 01360 01361 01362 void dp3m_calc_influence_function_force() 01363 { 01364 int i,n[3],ind; 01365 int end[3]; 01366 int size=1; 01367 double fak1,fak2; 01368 double nominator[1]={0.0},denominator=0.0; 01369 01370 dp3m_calc_meshift(); 01371 01372 for(i=0;i<3;i++) { 01373 size *= dfft.plan[3].new_mesh[i]; 01374 end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; 01375 } 01376 dp3m.g_force = (double *) realloc(dp3m.g_force, size*sizeof(double)); 01377 fak1 = dp3m.params.mesh[0]*dp3m.params.mesh[0]*dp3m.params.mesh[0]*2.0/(box_l[0]*box_l[0]); 01378 01379 for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++) 01380 for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++) 01381 for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) { 01382 ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] * ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0]))); 01383 01384 if( (n[0]==0) && (n[1]==0) && (n[2]==0) ) 01385 dp3m.g_force[ind] = 0.0; 01386 else if( (n[0]%(dp3m.params.mesh[0]/2)==0) && 01387 (n[1]%(dp3m.params.mesh[0]/2)==0) && 01388 (n[2]%(dp3m.params.mesh[0]/2)==0) ) 01389 dp3m.g_force[ind] = 0.0; 01390 else { 01391 denominator = dp3m_perform_aliasing_sums_force(n,nominator); 01392 fak2 = nominator[0]; 01393 fak2 /= pow(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]]),3) * SQR(denominator) ; 01394 dp3m.g_force[ind] = fak1*fak2; 01395 } 01396 } 01397 } 01398 01399 01400 /*****************************************************************************/ 01401 01402 double dp3m_perform_aliasing_sums_force(int n[3], double nominator[1]) 01403 { 01404 double denominator=0.0; 01405 /* lots of temporary variables... */ 01406 double sx,sy,sz,f1,f2,f3,mx,my,mz,nmx,nmy,nmz,nm2,expo; 01407 double limit = 30; 01408 double n_nm; 01409 double n_nm3; 01410 01411 nominator[0]=0.0; 01412 01413 f1 = 1.0/(double)dp3m.params.mesh[0]; 01414 f2 = SQR(PI/(dp3m.params.alpha_L)); 01415 01416 for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { 01417 nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx; 01418 sx = pow(sinc(f1*nmx),2.0*dp3m.params.cao); 01419 for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { 01420 nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my; 01421 sy = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao); 01422 for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { 01423 nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz; 01424 sz = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao); 01425 01426 nm2 = SQR(nmx)+SQR(nmy)+SQR(nmz); 01427 expo = f2*nm2; 01428 f3 = (expo<limit) ? sz*exp(-expo)/nm2 : 0.0; 01429 01430 n_nm = dp3m.d_op[n[0]]*nmx + dp3m.d_op[n[1]]*nmy + dp3m.d_op[n[2]]*nmz; 01431 n_nm3 = n_nm*n_nm*n_nm; 01432 01433 nominator[0] += f3*n_nm3; 01434 denominator += sz; 01435 } 01436 } 01437 } 01438 return denominator; 01439 } 01440 01441 01442 /*****************************************************************************/ 01443 01444 void dp3m_calc_influence_function_energy() 01445 { 01446 int i,n[3],ind; 01447 int end[3]; 01448 int size=1; 01449 double fak1,fak2; 01450 double nominator[1]={0.0},denominator=0.0; 01451 01452 dp3m_calc_meshift(); 01453 01454 for(i=0;i<3;i++) { 01455 size *= dfft.plan[3].new_mesh[i]; 01456 end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; 01457 } 01458 dp3m.g_energy = (double *) realloc(dp3m.g_energy, size*sizeof(double)); 01459 fak1 = dp3m.params.mesh[0]*dp3m.params.mesh[0]*dp3m.params.mesh[0]*2.0/(box_l[0]*box_l[0]); 01460 01461 for(n[0]=dfft.plan[3].start[0]; n[0]<end[0]; n[0]++) 01462 for(n[1]=dfft.plan[3].start[1]; n[1]<end[1]; n[1]++) 01463 for(n[2]=dfft.plan[3].start[2]; n[2]<end[2]; n[2]++) { 01464 ind = (n[2]-dfft.plan[3].start[2]) + dfft.plan[3].new_mesh[2] * ((n[1]-dfft.plan[3].start[1]) + (dfft.plan[3].new_mesh[1]*(n[0]-dfft.plan[3].start[0]))); 01465 01466 if( (n[0]==0) && (n[1]==0) && (n[2]==0) ) 01467 dp3m.g_energy[ind] = 0.0; 01468 else if( (n[0]%(dp3m.params.mesh[0]/2)==0) && 01469 (n[1]%(dp3m.params.mesh[0]/2)==0) && 01470 (n[2]%(dp3m.params.mesh[0]/2)==0) ) 01471 dp3m.g_energy[ind] = 0.0; 01472 else { 01473 denominator = dp3m_perform_aliasing_sums_energy(n,nominator); 01474 fak2 = nominator[0]; 01475 fak2 /= pow(SQR(dp3m.d_op[n[0]])+SQR(dp3m.d_op[n[1]])+SQR(dp3m.d_op[n[2]]),2) * SQR(denominator) ; 01476 dp3m.g_energy[ind] = fak1*fak2; 01477 } 01478 } 01479 } 01480 01481 /*****************************************************************************/ 01482 01483 double dp3m_perform_aliasing_sums_energy(int n[3], double nominator[1]) 01484 { 01485 double denominator=0.0; 01486 /* lots of temporary variables... */ 01487 double sx,sy,sz,f1,f2,f3,mx,my,mz,nmx,nmy,nmz,nm2,expo; 01488 double limit = 30; 01489 double n_nm; 01490 double n_nm2; 01491 01492 nominator[0]=0.0; 01493 01494 f1 = 1.0/(double)dp3m.params.mesh[0]; 01495 f2 = SQR(PI/(dp3m.params.alpha_L)); 01496 01497 for(mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { 01498 nmx = dp3m.meshift[n[0]] + dp3m.params.mesh[0]*mx; 01499 sx = pow(sinc(f1*nmx),2.0*dp3m.params.cao); 01500 for(my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { 01501 nmy = dp3m.meshift[n[1]] + dp3m.params.mesh[0]*my; 01502 sy = sx*pow(sinc(f1*nmy),2.0*dp3m.params.cao); 01503 for(mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { 01504 nmz = dp3m.meshift[n[2]] + dp3m.params.mesh[0]*mz; 01505 sz = sy*pow(sinc(f1*nmz),2.0*dp3m.params.cao); 01506 01507 nm2 = SQR(nmx)+SQR(nmy)+SQR(nmz); 01508 expo = f2*nm2; 01509 f3 = (expo<limit) ? sz*exp(-expo)/nm2 : 0.0; 01510 01511 n_nm = dp3m.d_op[n[0]]*nmx + dp3m.d_op[n[1]]*nmy + dp3m.d_op[n[2]]*nmz; 01512 n_nm2 = n_nm*n_nm; 01513 nominator[0] += f3*n_nm2; 01514 denominator += sz; 01515 } 01516 } 01517 } 01518 return denominator; 01519 } 01520 01521 01522 /*****************************************************************************/ 01523 01524 01525 /************************************************ 01526 * Functions for dipoloar P3M Parameter tuning 01527 * This tuning is based on the P3M tuning of the charges 01528 which in turn is based on the P3M_tune by M. Deserno 01529 ************************************************/ 01530 01531 #define P3M_TUNE_MAX_CUTS 50 01532 /** Tune dipolar P3M parameters to desired accuracy. 01533 01534 Usage: 01535 \verbatim inter dipolar <bjerrum> p3m tune accuracy <value> [r_cut <value> mesh <value> cao <value>] \endverbatim 01536 01537 The parameters are tuned to obtain the desired accuracy in best 01538 time, by running mpi_integrate(0) for several parameter sets. 01539 01540 The function utilizes the analytic expression of the error estimate 01541 for the dipolar P3M method see JCP,2008 paper by J.J.Cerda et al in 01542 order to obtain the rms error in the force for a system of N randomly 01543 distributed particles in a cubic box. 01544 For the real space error the estimate of Kolafa/Perram is used. 01545 01546 Parameter range if not given explicit values: For \ref p3m_parameter_struct::r_cut_iL 01547 the function uses the values (\ref min_local_box_l -\ref #skin) / 01548 (n * \ref box_l), n being an integer (this implies the assumption that \ref 01549 p3m_parameter_struct::r_cut_iL is the largest cutoff in the system!). For \ref 01550 p3m_parameter_struct::mesh the function uses the two values which matches best the 01551 equation: number of mesh point = number of magnetic dipolar particles. For 01552 \ref p3m_parameter_struct::cao the function considers all possible values. 01553 01554 For each setting \ref p3m_parameter_struct::alpha_L is calculated assuming that the 01555 error contributions of real and reciprocal space should be equal. 01556 01557 After checking if the total error fulfils the accuracy goal the 01558 time needed for one force calculation (including verlet list 01559 update) is measured via \ref mpi_integrate (0). 01560 01561 The function returns a log of the performed tuning. 01562 01563 The function is based on routines for charges. 01564 */ 01565 01566 01567 /*****************************************************************************/ 01568 01569 01570 /** get the minimal error for this combination of parameters. In fact, the real space error is tuned such that it 01571 contributes half of the total error, and then the Fourier space error is calculated. Returns the error and the 01572 optimal alpha, or 0 if this combination does not work at all */ 01573 double dp3m_get_accuracy(int mesh, int cao, double r_cut_iL, double *_alpha_L, double *_rs_err, double *_ks_err) 01574 { 01575 double rs_err, ks_err; 01576 double alpha_L; 01577 P3M_TRACE(fprintf(stderr, "dp3m_get_accuracy: mesh %d, cao %d, r_cut %f ", mesh, cao, r_cut_iL)); 01578 01579 /* calc maximal real space error for setting */ 01580 01581 //Alpha cannot be zero in the dipolar case because real_space formula breaks down 01582 //Idem of the previous function tclcommand_inter_magnetic_dp3m_print_tune_parameters, here we do nothing 01583 rs_err =P3M_DIPOLAR_real_space_error(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2,0.001); 01584 01585 01586 if(M_SQRT2*rs_err > dp3m.params.accuracy) { 01587 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ 01588 alpha_L=dp3m_rtbisection(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2, 01589 0.0001*box_l[0],5.0*box_l[0],0.0001,dp3m.params.accuracy); 01590 01591 } 01592 01593 else 01594 /* even alpha=0 is ok, however, we cannot choose it since it kills the k-space error formula. 01595 Anyways, this very likely NOT the optimal solution */ 01596 alpha_L = 0.1; 01597 01598 *_alpha_L = alpha_L; 01599 /* calculate real space and k space error for this alpha_L */ 01600 01601 rs_err = P3M_DIPOLAR_real_space_error(box_l[0],coulomb.Dprefactor,r_cut_iL,dp3m.sum_dip_part,dp3m.sum_mu2,alpha_L); 01602 ks_err = dp3m_k_space_error(box_l[0],coulomb.Dprefactor,mesh,cao,dp3m.sum_dip_part,dp3m.sum_mu2,alpha_L); 01603 01604 *_rs_err = rs_err; 01605 *_ks_err = ks_err; 01606 P3M_TRACE(fprintf(stderr, "dipolar tuning resulting: %f -> %f %f\n", alpha_L, rs_err, ks_err)); 01607 return sqrt(SQR(rs_err)+SQR(ks_err)); 01608 } 01609 01610 01611 /*****************************************************************************/ 01612 01613 /** get the optimal alpha and the corresponding computation time for fixed mesh, cao, r_cut and alpha */ 01614 static double dp3m_mcr_time(int mesh, int cao, double r_cut_iL, double alpha_L) 01615 { 01616 /* rounded up 2000/n_charges timing force evaluations */ 01617 int int_num = (1999 + dp3m.sum_dip_part)/dp3m.sum_dip_part; 01618 01619 /* broadcast p3m parameters for test run */ 01620 if (coulomb.Dmethod != DIPOLAR_P3M && coulomb.Dmethod != DIPOLAR_MDLC_P3M) 01621 coulomb.Dmethod = DIPOLAR_P3M; 01622 dp3m.params.r_cut_iL = r_cut_iL; 01623 dp3m.params.mesh[0] = dp3m.params.mesh[1] = dp3m.params.mesh[2] = mesh; 01624 dp3m.params.cao = cao; 01625 dp3m.params.alpha_L = alpha_L; 01626 dp3m_scaleby_box_l(); 01627 /* initialize p3m structures */ 01628 mpi_bcast_coulomb_params(); 01629 /* perform force calculation test */ 01630 return time_force_calc(int_num); 01631 } 01632 01633 /*****************************************************************************/ 01634 /** get the optimal alpha and the corresponding computation time for 01635 fixed mesh, cao. The r_cut is determined via a simple 01636 bisection. Returns -1 if the force evaluation does not work, -2 if 01637 there is no valid r_cut, and -3 if the charge assigment order is 01638 to large for this grid */ 01639 static double dp3m_mc_time(char **log, int mesh, int cao, 01640 double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL, 01641 double *_alpha_L, double *_accuracy) 01642 { 01643 double int_time; 01644 double r_cut_iL; 01645 double rs_err, ks_err, mesh_size, k_cut; 01646 int i, n_cells; 01647 char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128]; 01648 01649 /* initial checks. */ 01650 mesh_size = box_l[0]/(double)mesh; 01651 k_cut = mesh_size*cao/2.0; 01652 P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh=%d, cao=%d, rmin=%f, rmax=%f\n", 01653 mesh, cao, r_cut_iL_min, r_cut_iL_max)); 01654 if(cao >= mesh || k_cut >= dmin(min_box_l,min_local_box_l) - skin) { 01655 /* print result */ 01656 sprintf(b,"%-4d %-3d cao too large for this mesh\n", mesh, cao); 01657 *log = strcat_alloc(*log, b); 01658 return -3; 01659 } 01660 01661 /* Either low and high boundary are equal (for fixed cut), or the low border is initially 0 and therefore 01662 has infinite error estimate, as required. Therefore if the high boundary fails, there is no possible r_cut */ 01663 if ((*_accuracy = dp3m_get_accuracy(mesh, cao, r_cut_iL_max, _alpha_L, &rs_err, &ks_err)) > dp3m.params.accuracy) { 01664 /* print result */ 01665 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e accuracy not achieved\n", 01666 mesh, cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err); 01667 *log = strcat_alloc(*log, b); 01668 return -2; 01669 } 01670 01671 for (;;) { 01672 P3M_TRACE(fprintf(stderr, "dp3m_mc_time: interval [%f,%f]\n", r_cut_iL_min, r_cut_iL_max)); 01673 r_cut_iL = 0.5*(r_cut_iL_min + r_cut_iL_max); 01674 01675 if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC) 01676 break; 01677 01678 /* bisection */ 01679 if (dp3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err) > dp3m.params.accuracy) 01680 r_cut_iL_min = r_cut_iL; 01681 else 01682 r_cut_iL_max = r_cut_iL; 01683 } 01684 /* final result is always the upper interval boundary, since only there 01685 we know that the desired minimal accuracy is obtained */ 01686 *_r_cut_iL = r_cut_iL = r_cut_iL_max; 01687 01688 /* check whether we are running P3M+DLC, and whether we leave a reasonable gap space */ 01689 if (coulomb.Dmethod == DIPOLAR_MDLC_P3M) { 01690 char *errtxt = runtime_error(128); 01691 ERROR_SPRINTF(errtxt, "{dipolar P3M: tuning when dlc needs to be fixed} "); 01692 } 01693 01694 /* check whether this radius is too large, so that we would use less cells than allowed */ 01695 n_cells = 1; 01696 for (i = 0; i < 3; i++) 01697 n_cells *= (int)(floor(local_box_l[i]/(r_cut_iL*box_l[0] + skin))); 01698 if (n_cells < min_num_cells) { 01699 P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh %d cao %d r_cut %f reject n_cells %d\n", mesh, cao, r_cut_iL, n_cells)); 01700 /* print result */ 01701 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e radius dangerously high\n\n", 01702 mesh, cao, r_cut_iL_max, *_alpha_L, *_accuracy, rs_err, ks_err); 01703 *log = strcat_alloc(*log, b); 01704 return -2; 01705 } 01706 01707 int_time = dp3m_mcr_time(mesh, cao, r_cut_iL, *_alpha_L); 01708 if (int_time == -1) { 01709 *log = strcat_alloc(*log, "tuning failed, test integration not possible\n"); 01710 return -1; 01711 } 01712 01713 *_accuracy = dp3m_get_accuracy(mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err); 01714 01715 P3M_TRACE(fprintf(stderr, "dp3m_mc_time: mesh %d cao %d r_cut %f time %f\n", mesh, cao, r_cut_iL, int_time)); 01716 /* print result */ 01717 sprintf(b, "%-4d %-3d %.5e %.5e %.5e %.3e %.3e %-8d\n", 01718 mesh, cao, r_cut_iL, *_alpha_L, *_accuracy, rs_err, ks_err, (int)int_time); 01719 *log = strcat_alloc(*log, b); 01720 return int_time; 01721 } 01722 01723 01724 /*****************************************************************************/ 01725 01726 /** get the optimal alpha and the corresponding computation time for fixed mesh. *cao 01727 should contain an initial guess, which is then adapted by stepping up and down. Returns the time 01728 upon completion, -1 if the force evaluation does not work, and -2 if the accuracy cannot be met */ 01729 static double dp3m_m_time(char **log, int mesh, 01730 int cao_min, int cao_max, int *_cao, 01731 double r_cut_iL_min, double r_cut_iL_max, double *_r_cut_iL, 01732 double *_alpha_L, double *_accuracy) 01733 { 01734 double best_time = -1, tmp_time, tmp_r_cut_iL, tmp_alpha_L=0.0, tmp_accuracy=0.0; 01735 /* in which direction improvement is possible. Initially, we dont know it yet. */ 01736 int final_dir = 0; 01737 int cao = *_cao; 01738 01739 P3M_TRACE(fprintf(stderr, "dp3m_m_time: Dmesh=%d, Dcao_min=%d, Dcao_max=%d, Drmin=%f, Drmax=%f\n", 01740 mesh, cao_min, cao_max, r_cut_iL_min, r_cut_iL_max)); 01741 /* the initial step sets a timing mark. If there is no valid r_cut, we can only try 01742 to increase cao to increase the obtainable precision of the far formula. */ 01743 do { 01744 tmp_time = dp3m_mc_time(log, mesh, cao, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01745 /* bail out if the force evaluation is not working */ 01746 if (tmp_time == -1) return -1; 01747 /* cao is too large for this grid, but still the accuracy cannot be achieved, give up */ 01748 if (tmp_time == -3) { 01749 P3M_TRACE(fprintf(stderr, "dp3m_m_time: no possible cao found\n")); 01750 return -2; 01751 } 01752 /* we have a valid time, start optimising from there */ 01753 if (tmp_time >= 0) { 01754 best_time = tmp_time; 01755 *_r_cut_iL = tmp_r_cut_iL; 01756 *_alpha_L = tmp_alpha_L; 01757 *_accuracy = tmp_accuracy; 01758 *_cao = cao; 01759 break; 01760 } 01761 /* the required accuracy could not be obtained, try higher caos. Therefore optimisation can only be 01762 obtained with even higher caos, but not lower ones */ 01763 P3M_TRACE(fprintf(stderr, "tclcommand_inter_magnetic_p3m_print_m_time: doesn't give precision, step up\n")); 01764 cao++; 01765 final_dir = 1; 01766 } 01767 while (cao <= cao_max); 01768 /* with this mesh, the required accuracy cannot be obtained. */ 01769 if (cao > cao_max) return -2; 01770 01771 /* at the boundaries, only the opposite direction can be used for optimisation */ 01772 if (cao == cao_min) final_dir = 1; 01773 else if (cao == cao_max) final_dir = -1; 01774 01775 P3M_TRACE(fprintf(stderr, "dp3m_m_time: final constraints dir %d\n", final_dir)); 01776 01777 if (final_dir == 0) { 01778 /* check in which direction we can optimise. Both directions are possible */ 01779 double dir_times[3]; 01780 for (final_dir = -1; final_dir <= 1; final_dir += 2) { 01781 dir_times[final_dir + 1] = tmp_time = 01782 dp3m_mc_time(log, mesh, cao + final_dir, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01783 /* bail out on errors, as usual */ 01784 if (tmp_time == -1) return -1; 01785 /* in this direction, we cannot optimise, since we get into precision trouble */ 01786 if (tmp_time < 0) continue; 01787 01788 if (tmp_time < best_time) { 01789 best_time = tmp_time; 01790 *_r_cut_iL = tmp_r_cut_iL; 01791 *_alpha_L = tmp_alpha_L; 01792 *_accuracy = tmp_accuracy; 01793 *_cao = cao + final_dir; 01794 } 01795 } 01796 /* choose the direction which was optimal, if any of the two */ 01797 if (dir_times[0] == best_time) { final_dir = -1; } 01798 else if (dir_times[2] == best_time) { final_dir = 1; } 01799 else { 01800 /* no improvement in either direction, however if one is only marginally worse, we can still try*/ 01801 /* down is possible and not much worse, while up is either illegal or even worse */ 01802 if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) && 01803 (dir_times[2] < 0 || dir_times[2] > dir_times[0])) 01804 final_dir = -1; 01805 /* same for up */ 01806 else if ((dir_times[2] >= 0 && dir_times[2] < best_time + P3M_TIME_GRAN) && 01807 (dir_times[0] < 0 || dir_times[0] > dir_times[2])) 01808 final_dir = 1; 01809 else { 01810 /* really no chance for optimisation */ 01811 P3M_TRACE(fprintf(stderr, "dp3m_m_time: Dmesh=%d final Dcao=%d time=%f\n",mesh, cao, best_time)); 01812 return best_time; 01813 } 01814 } 01815 /* we already checked the initial cao and its neighbor */ 01816 cao += 2*final_dir; 01817 } 01818 else { 01819 /* here some constraint is active, and we only checked the initial cao itself */ 01820 cao += final_dir; 01821 } 01822 01823 P3M_TRACE(fprintf(stderr, "dp3m_m_time: optimise in direction %d\n", final_dir)); 01824 01825 /* move cao into the optimisation direction until we do not gain anymore. */ 01826 for (; cao >= cao_min && cao <= cao_max; cao += final_dir) { 01827 tmp_time = dp3m_mc_time(log, mesh, cao, r_cut_iL_min, r_cut_iL_max, 01828 &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy); 01829 /* bail out on errors, as usual */ 01830 if (tmp_time == -1) return -1; 01831 /* if we cannot meet the precision anymore, give up */ 01832 if (tmp_time < 0) break; 01833 01834 if (tmp_time < best_time) { 01835 best_time = tmp_time; 01836 *_r_cut_iL = tmp_r_cut_iL; 01837 *_alpha_L = tmp_alpha_L; 01838 *_accuracy = tmp_accuracy; 01839 *_cao = cao; 01840 } 01841 /* no hope of further optimisation */ 01842 else if (tmp_time > best_time + P3M_TIME_GRAN) 01843 break; 01844 } 01845 P3M_TRACE(fprintf(stderr, "tclcommand_inter_magnetic_p3m_print_m_time: Dmesh=%d final Dcao=%d Dr_cut=%f time=%f\n",mesh, *_cao, *_r_cut_iL, best_time)); 01846 return best_time; 01847 } 01848 01849 01850 /** Tuning of dipolar P3M. The algorithm 01851 basically determines the mesh, cao and then the real space cutoff, in this nested order. 01852 01853 For each mesh, the cao optimal for the mesh tested previously is used as an initial guess, 01854 and the algorithm tries whether increasing or decreasing it leads to a better solution. This 01855 is efficient, since the optimal cao only changes little with the meshes in general. 01856 01857 The real space cutoff for a given mesh and cao is determined via a bisection on the error estimate, 01858 which determines where the error estimate equals the required accuracy. Therefore the smallest 01859 possible, i.e. fastest real space cutoff is determined. 01860 01861 Both the search over mesh and cao stop to search in a specific direction once the computation time is 01862 significantly higher than the currently known optimum. 01863 */ 01864 01865 int dp3m_adaptive_tune(char **logger) 01866 { 01867 int mesh_max, mesh = -1, tmp_mesh; 01868 double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1, tmp_r_cut_iL=0.0; 01869 int cao_min, cao_max, cao = -1, tmp_cao; 01870 01871 double alpha_L = -1, tmp_alpha_L=0.0; 01872 double accuracy = -1, tmp_accuracy=0.0; 01873 double time_best=1e20, tmp_time; 01874 char b[3*ES_INTEGER_SPACE + 3*ES_DOUBLE_SPACE + 128]; 01875 01876 P3M_TRACE(fprintf(stderr,"%d: dp3m_adaptive_tune\n",this_node)); 01877 01878 if (skin == -1) { 01879 *logger = strcat_alloc(*logger, "p3m cannot be tuned, since the skin is not yet set"); 01880 return ES_ERROR; 01881 } 01882 01883 /* preparation */ 01884 mpi_bcast_event(P3M_COUNT_DIPOLES); 01885 01886 /* Print Status */ 01887 sprintf(b, "dipolar P3M tune parameters: Accuracy goal = %.5e\n", dp3m.params.accuracy); 01888 *logger = strcat_alloc(*logger, b); 01889 sprintf(b, "System: box_l = %.5e # charged part = %d Sum[q_i^2] = %.5e\n", 01890 box_l[0], dp3m.sum_dip_part, dp3m.sum_mu2); 01891 *logger = strcat_alloc(*logger, b); 01892 01893 if (dp3m.sum_dip_part == 0) { 01894 *logger = strcat_alloc(*logger, "no dipolar particles in the system, cannot tune dipolar P3M"); 01895 return ES_ERROR; 01896 } 01897 01898 /* parameter ranges */ 01899 if (dp3m.params.mesh[0] == 0 ) { 01900 double expo; 01901 expo = log(pow((double)dp3m.sum_dip_part,(1.0/3.0)))/log(2.0); 01902 01903 tmp_mesh = (int)(pow(2.0,(double)((int)expo))+0.1); 01904 /* this limits the tried meshes if the accuracy cannot 01905 be obtained with smaller meshes, but normally not all these 01906 meshes have to be tested */ 01907 mesh_max = tmp_mesh * 256; 01908 /* avoid using more than 1 GB of FFT arrays (per default, see config.h) */ 01909 if (mesh_max > P3M_MAX_MESH) 01910 mesh_max = P3M_MAX_MESH; 01911 } 01912 else { 01913 tmp_mesh = mesh_max = dp3m.params.mesh[0]; 01914 01915 sprintf(b, "fixed mesh %d\n", dp3m.params.mesh[0]); 01916 *logger = strcat_alloc(*logger, b); 01917 } 01918 01919 if(dp3m.params.r_cut_iL == 0.0) { 01920 r_cut_iL_min = 0; 01921 r_cut_iL_max = dmin(min_local_box_l, min_box_l/2) - skin; 01922 r_cut_iL_min *= box_l_i[0]; 01923 r_cut_iL_max *= box_l_i[0]; 01924 } 01925 else { 01926 r_cut_iL_min = r_cut_iL_max = dp3m.params.r_cut_iL; 01927 01928 sprintf(b, "fixed r_cut_iL %f\n", dp3m.params.r_cut_iL); 01929 *logger = strcat_alloc(*logger, b); 01930 } 01931 01932 if(dp3m.params.cao == 0) { 01933 cao_min = 1; 01934 cao_max = 7; 01935 cao = 3; 01936 } 01937 else { 01938 cao_min = cao_max = cao = dp3m.params.cao; 01939 01940 sprintf(b, "fixed cao %d\n", dp3m.params.cao); 01941 *logger = strcat_alloc(*logger, b); 01942 } 01943 01944 *logger = strcat_alloc(*logger, "Dmesh cao Dr_cut_iL Dalpha_L Derr Drs_err Dks_err time [ms]\n"); 01945 01946 /* mesh loop */ 01947 for (;tmp_mesh <= mesh_max; tmp_mesh *= 2) { 01948 tmp_cao = cao; 01949 tmp_time = dp3m_m_time(logger, tmp_mesh, 01950 cao_min, cao_max, &tmp_cao, 01951 r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, 01952 &tmp_alpha_L, &tmp_accuracy); 01953 /* some error occured during the tuning force evaluation */ 01954 if (tmp_time == -1) return ES_ERROR; 01955 /* this mesh does not work at all */ 01956 if (tmp_time < 0) continue; 01957 01958 /* the optimum r_cut for this mesh is the upper limit for higher meshes, 01959 everything else is slower */ 01960 r_cut_iL_max = tmp_r_cut_iL; 01961 01962 /* new optimum */ 01963 if (tmp_time < time_best) { 01964 time_best = tmp_time; 01965 mesh = tmp_mesh; 01966 cao = tmp_cao; 01967 r_cut_iL = tmp_r_cut_iL; 01968 alpha_L = tmp_alpha_L; 01969 accuracy = tmp_accuracy; 01970 } 01971 /* no hope of further optimisation */ 01972 else if (tmp_time > time_best + P3M_TIME_GRAN) 01973 break; 01974 } 01975 01976 P3M_TRACE(fprintf(stderr,"finshed tuning\n")); 01977 if(time_best == 1e20) { 01978 *logger = strcat_alloc(*logger, "failed to tune dipolar P3M parameters to required accuracy\n"); 01979 return ES_ERROR; 01980 } 01981 01982 /* set tuned p3m parameters */ 01983 dp3m.params.r_cut_iL = r_cut_iL; 01984 dp3m.params.mesh[0] = dp3m.params.mesh[1] = dp3m.params.mesh[2] = mesh; 01985 dp3m.params.cao = cao; 01986 dp3m.params.alpha_L = alpha_L; 01987 dp3m.params.accuracy = accuracy; 01988 dp3m_scaleby_box_l(); 01989 /* broadcast tuned p3m parameters */ 01990 mpi_bcast_coulomb_params(); 01991 /* Tell the user about the outcome */ 01992 sprintf(b, "\nresulting parameters:\n%-4d %-3d %.5e %.5e %.5e %-8d\n", 01993 mesh, cao, r_cut_iL, alpha_L, accuracy, (int)time_best); 01994 *logger = strcat_alloc(*logger, b); 01995 return ES_OK; 01996 } 01997 01998 /*****************************************************************************/ 01999 02000 void p3m_print_dp3m_struct(p3m_parameter_struct ps) { 02001 fprintf(stderr,"%d: dipolar p3m_parameter_struct: \n",this_node); 02002 fprintf(stderr," alpha_L=%f, r_cut_iL=%f \n", 02003 ps.alpha_L,ps.r_cut_iL); 02004 fprintf(stderr," mesh=(%d,%d,%d), mesh_off=(%.4f,%.4f,%.4f)\n", 02005 ps.mesh[0],ps.mesh[1],ps.mesh[2], 02006 ps.mesh_off[0],ps.mesh_off[1],ps.mesh_off[2]); 02007 fprintf(stderr," Dcao=%d, Dinter=%d, Depsilon=%f\n", 02008 ps.cao,ps.inter,ps.epsilon); 02009 fprintf(stderr," Dcao_cut=(%f,%f,%f)\n", 02010 ps.cao_cut[0],ps.cao_cut[1],ps.cao_cut[2]); 02011 fprintf(stderr," Da=(%f,%f,%f), Dai=(%f,%f,%f)\n", 02012 ps.a[0],ps.a[1],ps.a[2],ps.ai[0],ps.ai[1],ps.ai[2]); 02013 } 02014 02015 /*****************************************************************************/ 02016 02017 void dp3m_count_magnetic_particles() 02018 { 02019 Cell *cell; 02020 Particle *part; 02021 int i,c,np; 02022 double node_sums[2], tot_sums[2]; 02023 02024 for(i=0;i<2;i++) 02025 { node_sums[i]=0.0; tot_sums[i]=0.0;} 02026 02027 for (c = 0; c < local_cells.n; c++) { 02028 cell = local_cells.cell[c]; 02029 part = cell->part; 02030 np = cell->n; 02031 for(i=0;i<np;i++) { 02032 if( part[i].p.dipm != 0.0 ) { 02033 node_sums[0] += SQR(part[i].r.dip[0]) 02034 +SQR(part[i].r.dip[1]) 02035 +SQR(part[i].r.dip[2]); 02036 node_sums[1] += 1.0; 02037 } 02038 } 02039 } 02040 02041 MPI_Allreduce(node_sums, tot_sums, 2, MPI_DOUBLE, MPI_SUM, comm_cart); 02042 dp3m.sum_mu2 = tot_sums[0]; 02043 dp3m.sum_dip_part = (int)(tot_sums[1]+0.1); 02044 } 02045 02046 02047 02048 /*****************************************************************************/ 02049 02050 /* Following are the two functions for computing the error in dipolar P3M and 02051 tune the parameters to minimize the time with the desired accuracy. 02052 02053 02054 This functions are called by the functions: dp3m_get_accuracy() and 02055 tclcommand_inter_magnetic_dp3m_print_tune_parameters. 02056 02057 */ 02058 02059 02060 /*****************************************************************************/ 02061 02062 02063 02064 02065 static double dp3m_k_space_error(double box_size, double prefac, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L) 02066 { 02067 int nx, ny, nz; 02068 double he_q = 0.0, mesh_i = 1./mesh, alpha_L_i = 1./alpha_L; 02069 double alias1, alias2, n2, cs; 02070 02071 for (nx=-mesh/2; nx<mesh/2; nx++) 02072 for (ny=-mesh/2; ny<mesh/2; ny++) 02073 for (nz=-mesh/2; nz<mesh/2; nz++) 02074 if((nx!=0) || (ny!=0) || (nz!=0)) { 02075 n2 = SQR(nx) + SQR(ny) + SQR(nz); 02076 cs = p3m_analytic_cotangent_sum(nx,mesh_i,cao)* 02077 p3m_analytic_cotangent_sum(ny,mesh_i,cao)* 02078 p3m_analytic_cotangent_sum(nz,mesh_i,cao); 02079 dp3m_tune_aliasing_sums(nx,ny,nz,mesh,mesh_i,cao,alpha_L_i,&alias1,&alias2); 02080 double d = alias1 - SQR(alias2/cs) / (n2*n2*n2); 02081 /* at high precisions, d can become negative due to extinction; 02082 also, don't take values that have no significant digits left*/ 02083 if (d > 0 && (fabs(d/alias1) > ROUND_ERROR_PREC)) 02084 he_q += d; 02085 } 02086 02087 return 8.*PI*PI/3.*sum_q2*sqrt(he_q/(double)n_c_part) / (box_size*box_size*box_size*box_size); 02088 } 02089 02090 02091 void dp3m_tune_aliasing_sums(int nx, int ny, int nz, int mesh, double mesh_i, int cao, double alpha_L_i, double *alias1, double *alias2) 02092 { 02093 int mx,my,mz; 02094 double nmx,nmy,nmz; 02095 double fnmx,fnmy,fnmz; 02096 02097 double ex,ex2,nm2,U2,factor1; 02098 02099 factor1 = SQR(PI*alpha_L_i); 02100 02101 *alias1 = *alias2 = 0.0; 02102 for (mx=-P3M_BRILLOUIN; mx<=P3M_BRILLOUIN; mx++) { 02103 fnmx = mesh_i * (nmx = nx + mx*mesh); 02104 for (my=-P3M_BRILLOUIN; my<=P3M_BRILLOUIN; my++) { 02105 fnmy = mesh_i * (nmy = ny + my*mesh); 02106 for (mz=-P3M_BRILLOUIN; mz<=P3M_BRILLOUIN; mz++) { 02107 fnmz = mesh_i * (nmz = nz + mz*mesh); 02108 02109 nm2 = SQR(nmx) + SQR(nmy) + SQR(nmz); 02110 ex2 = SQR( ex = exp(-factor1*nm2) ); 02111 02112 U2 = pow(sinc(fnmx)*sinc(fnmy)*sinc(fnmz), 2.0*cao); 02113 02114 *alias1 += ex2 * nm2; 02115 *alias2 += U2 * ex * pow((nx*nmx + ny*nmy + nz*nmz),3.) / nm2; 02116 } 02117 } 02118 } 02119 } 02120 02121 02122 02123 02124 02125 //---------------------------------------------------------- 02126 // Function used to calculate the value of the errors 02127 // for the REAL part of the force in terms of the Spliting parameter alpha of Ewald 02128 // based on the formulas 33, the paper of Zuowei-HolmJCP, 115,6351,(2001). 02129 02130 // Please, notice than in this more refined approach we don't use 02131 // formulas 37, but 33 which mantains all the powers in alpha 02132 02133 //------------------------------------------------------------ 02134 02135 02136 02137 02138 double P3M_DIPOLAR_real_space_error(double box_size, double prefac, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L) 02139 { 02140 double d_error_f,d_cc,d_dc,d_rcut2,d_con; 02141 double d_a2,d_c,d_RCUT; 02142 02143 02144 02145 02146 d_RCUT=r_cut_iL*box_size; 02147 d_rcut2=d_RCUT*d_RCUT; 02148 02149 02150 d_a2=alpha_L*alpha_L/(box_size*box_size); 02151 02152 d_c =sum_q2*exp(-d_a2*d_RCUT*d_RCUT); 02153 02154 d_cc=4.0*d_a2*d_a2*d_rcut2*d_rcut2+6.0*d_a2*d_rcut2+3.0; 02155 02156 02157 d_dc=8.0*d_a2*d_a2*d_a2*d_rcut2*d_rcut2*d_rcut2+20.0*d_a2*d_a2*d_rcut2*d_rcut2 \ 02158 +30*d_a2*d_rcut2+15.0; 02159 02160 d_con=1.0/sqrt(box_size*box_size*box_size*d_a2*d_a2*d_rcut2*d_rcut2*d_rcut2*d_rcut2*d_RCUT*(double)n_c_part); 02161 02162 02163 d_error_f=d_c*d_con*sqrt((13./6.)*d_cc*d_cc+(2./15.)*d_dc*d_dc-(13./15.)*d_cc*d_dc); 02164 02165 02166 return d_error_f; 02167 } 02168 02169 02170 02171 /*****************************************************************************/ 02172 02173 02174 // Using bisection find the root of a function "func-tuned_accuracy/sqrt(2.)" known to lie 02175 //between x1 and x2. The root, returned as rtbis, will be refined 02176 //until its accuracy is +-xacc. 02177 02178 double dp3m_rtbisection( double box_size, double prefac, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy) 02179 { 02180 int j; 02181 double dx,f,fmid,xmid,rtb,constant,JJ_RTBIS_MAX=40; 02182 02183 constant=tuned_accuracy/sqrt(2.); 02184 02185 02186 f=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2, x1)-constant; 02187 fmid=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2, x2)-constant; 02188 if(f*fmid >=0.0) fprintf(stderr,"Root must be bracketed for bisection in dp3m_rtbisection \n"); 02189 rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2); // Orient the search dx, and set rtb to x1 or x2 ... 02190 for (j=1;j<=JJ_RTBIS_MAX;j++){ 02191 fmid=P3M_DIPOLAR_real_space_error(box_size,prefac,r_cut_iL,n_c_part,sum_q2, xmid=rtb+(dx *= 0.5))-constant; 02192 if(fmid<=0.0) rtb=xmid; 02193 if(fabs(dx) < xacc || fmid == 0.0) return rtb; 02194 } 02195 fprintf(stderr,"Too many bisections in JJ_rtbissection \n"); 02196 return -9999999.9999; 02197 02198 } 02199 02200 02201 02202 /************************************************************/ 02203 02204 02205 void dp3m_calc_lm_ld_pos() { 02206 int i; 02207 for(i=0;i<3;i++) { 02208 dp3m.local_mesh.ld_pos[i] = (dp3m.local_mesh.ld_ind[i]+ dp3m.params.mesh_off[i])*dp3m.params.a[i]; 02209 } 02210 } 02211 02212 02213 02214 /************************************************************/ 02215 02216 02217 void dp3m_init_a_ai_cao_cut() { 02218 int i; 02219 for(i=0;i<3;i++) { 02220 dp3m.params.ai[i] = (double)dp3m.params.mesh[i]/box_l[i]; 02221 dp3m.params.a[i] = 1.0/dp3m.params.ai[i]; 02222 dp3m.params.cao_cut[i] = 0.5*dp3m.params.a[i]*dp3m.params.cao; 02223 } 02224 } 02225 02226 02227 02228 /************************************************************/ 02229 02230 void dp3m_calc_local_ca_mesh() { 02231 int i; 02232 int ind[3]; 02233 /* total skin size */ 02234 double full_skin[3]; 02235 02236 for(i=0;i<3;i++) 02237 full_skin[i]= dp3m.params.cao_cut[i]+skin+dp3m.params.additional_mesh[i]; 02238 02239 /* inner left down grid point (global index) */ 02240 for(i=0;i<3;i++) dp3m.local_mesh.in_ld[i] = (int)ceil(my_left[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i]); 02241 /* inner up right grid point (global index) */ 02242 for(i=0;i<3;i++) dp3m.local_mesh.in_ur[i] = (int)floor(my_right[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i]); 02243 02244 /* correct roundof errors at boundary */ 02245 for(i=0;i<3;i++) { 02246 if((my_right[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-dp3m.local_mesh.in_ur[i]<ROUND_ERROR_PREC) dp3m.local_mesh.in_ur[i]--; 02247 if(1.0+(my_left[i]*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-dp3m.local_mesh.in_ld[i]<ROUND_ERROR_PREC) dp3m.local_mesh.in_ld[i]--; 02248 } 02249 /* inner grid dimensions */ 02250 for(i=0;i<3;i++) dp3m.local_mesh.inner[i] = dp3m.local_mesh.in_ur[i] - dp3m.local_mesh.in_ld[i] + 1; 02251 /* index of left down grid point in global mesh */ 02252 for(i=0;i<3;i++) 02253 dp3m.local_mesh.ld_ind[i]=(int)ceil((my_left[i]-full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i]); 02254 /* spacial position of left down mesh point */ 02255 dp3m_calc_lm_ld_pos(); 02256 /* left down margin */ 02257 for(i=0;i<3;i++) dp3m.local_mesh.margin[i*2] = dp3m.local_mesh.in_ld[i]-dp3m.local_mesh.ld_ind[i]; 02258 /* up right grid point */ 02259 for(i=0;i<3;i++) ind[i]=(int)floor((my_right[i]+full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i]); 02260 /* correct roundof errors at up right boundary */ 02261 for(i=0;i<3;i++) 02262 if(((my_right[i]+full_skin[i])*dp3m.params.ai[i]-dp3m.params.mesh_off[i])-ind[i]==0) ind[i]--; 02263 /* up right margin */ 02264 for(i=0;i<3;i++) dp3m.local_mesh.margin[(i*2)+1] = ind[i] - dp3m.local_mesh.in_ur[i]; 02265 02266 /* grid dimension */ 02267 dp3m.local_mesh.size=1; 02268 for(i=0;i<3;i++) {dp3m.local_mesh.dim[i] = ind[i] - dp3m.local_mesh.ld_ind[i] + 1; dp3m.local_mesh.size*=dp3m.local_mesh.dim[i];} 02269 /* reduce inner grid indices from global to local */ 02270 for(i=0;i<3;i++) dp3m.local_mesh.in_ld[i] = dp3m.local_mesh.margin[i*2]; 02271 for(i=0;i<3;i++) dp3m.local_mesh.in_ur[i] = dp3m.local_mesh.margin[i*2]+dp3m.local_mesh.inner[i]; 02272 02273 dp3m.local_mesh.q_2_off = dp3m.local_mesh.dim[2] - dp3m.params.cao; 02274 dp3m.local_mesh.q_21_off = dp3m.local_mesh.dim[2] * (dp3m.local_mesh.dim[1] - dp3m.params.cao); 02275 02276 } 02277 02278 /*****************************************************************************/ 02279 02280 02281 int dp3m_sanity_checks_boxl() { 02282 char *errtxt; 02283 int i, ret = 0; 02284 for(i=0;i<3;i++) { 02285 /* check k-space cutoff */ 02286 if(dp3m.params.cao_cut[i] >= 0.5*box_l[i]) { 02287 errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE); 02288 ERROR_SPRINTF(errtxt,"{039 dipolar P3M_init: k-space cutoff %g is larger than half of box dimension %g} ",dp3m.params.cao_cut[i],box_l[i]); 02289 ret = 1; 02290 } 02291 if(dp3m.params.cao_cut[i] >= local_box_l[i]) { 02292 errtxt = runtime_error(128 + 2*ES_DOUBLE_SPACE); 02293 ERROR_SPRINTF(errtxt,"{040 dipolar P3M_init: k-space cutoff %g is larger than local box dimension %g} ",dp3m.params.cao_cut[i],local_box_l[i]); 02294 ret = 1; 02295 } 02296 } 02297 return ret; 02298 } 02299 02300 /*****************************************************************************/ 02301 02302 02303 int dp3m_sanity_checks() 02304 { 02305 char *errtxt; 02306 int ret = 0; 02307 02308 if (!PERIODIC(0) || !PERIODIC(1) || !PERIODIC(2)) { 02309 errtxt = runtime_error(128); 02310 ERROR_SPRINTF(errtxt, "{041 dipolar P3M requires periodicity 1 1 1} "); 02311 ret = 1; 02312 } 02313 /* 02314 if (n_nodes != 1) { 02315 errtxt = runtime_error(128); 02316 ERROR_SPRINTF(errtxt, "{110 dipolar P3M does not run in parallel} "); 02317 ret = 1; 02318 } */ 02319 if (cell_structure.type != CELL_STRUCTURE_DOMDEC) { 02320 errtxt = runtime_error(128); 02321 ERROR_SPRINTF(errtxt, "{042 dipolar P3M at present requires the domain decomposition cell system} "); 02322 ret = 1; 02323 } 02324 02325 if( (box_l[0] != box_l[1]) || (box_l[1] != box_l[2]) ) { 02326 errtxt = runtime_error(128); 02327 ERROR_SPRINTF(errtxt,"{043 dipolar P3M requires a cubic box} "); 02328 ret = 1; 02329 } 02330 02331 if( (dp3m.params.mesh[0] != dp3m.params.mesh[1]) || (dp3m.params.mesh[1] != dp3m.params.mesh[2]) ) { 02332 errtxt = runtime_error(128); 02333 ERROR_SPRINTF(errtxt, "{044 dipolar P3M requires a cubic mesh} "); 02334 ret = 1; 02335 } 02336 02337 if (dp3m_sanity_checks_boxl()) ret = 1; 02338 02339 if (skin == -1) { 02340 errtxt = runtime_error(128); 02341 ERROR_SPRINTF(errtxt,"{047 dipolar P3M_init: skin is not yet set} "); 02342 ret = 1; 02343 } 02344 02345 if( dp3m.params.mesh[0] == 0) { 02346 errtxt = runtime_error(128); 02347 ERROR_SPRINTF(errtxt,"{045 dipolar P3M_init: mesh size is not yet set} "); 02348 ret = 1; 02349 } 02350 if( dp3m.params.cao == 0) { 02351 errtxt = runtime_error(128); 02352 ERROR_SPRINTF(errtxt,"{046 dipolar P3M_init: cao is not yet set} "); 02353 ret = 1; 02354 } 02355 if(node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) { 02356 errtxt = runtime_error(128); 02357 ERROR_SPRINTF(errtxt,"{046 dipolar P3M_init: node grid must be sorted, largest first} "); 02358 ret = 1; 02359 } 02360 02361 return ret; 02362 } 02363 02364 02365 /*****************************************************************************/ 02366 02367 02368 void dp3m_calc_send_mesh() 02369 { 02370 int i,j,evenodd; 02371 int done[3]={0,0,0}; 02372 MPI_Status status; 02373 /* send grids */ 02374 for(i=0;i<3;i++) { 02375 for(j=0;j<3;j++) { 02376 /* left */ 02377 dp3m.sm.s_ld[i*2][j] = 0 + done[j]*dp3m.local_mesh.margin[j*2]; 02378 if(j==i) dp3m.sm.s_ur[i*2][j] = dp3m.local_mesh.margin[j*2]; 02379 else dp3m.sm.s_ur[i*2][j] = dp3m.local_mesh.dim[j]-done[j]*dp3m.local_mesh.margin[(j*2)+1]; 02380 /* right */ 02381 if(j==i) dp3m.sm.s_ld[(i*2)+1][j] = dp3m.local_mesh.in_ur[j]; 02382 else dp3m.sm.s_ld[(i*2)+1][j] = 0 + done[j]*dp3m.local_mesh.margin[j*2]; 02383 dp3m.sm.s_ur[(i*2)+1][j] = dp3m.local_mesh.dim[j] - done[j]*dp3m.local_mesh.margin[(j*2)+1]; 02384 } 02385 done[i]=1; 02386 } 02387 dp3m.sm.max=0; 02388 for(i=0;i<6;i++) { 02389 dp3m.sm.s_size[i] = 1; 02390 for(j=0;j<3;j++) { 02391 dp3m.sm.s_dim[i][j] = dp3m.sm.s_ur[i][j]-dp3m.sm.s_ld[i][j]; 02392 dp3m.sm.s_size[i] *= dp3m.sm.s_dim[i][j]; 02393 } 02394 if(dp3m.sm.s_size[i]>dp3m.sm.max) dp3m.sm.max=dp3m.sm.s_size[i]; 02395 } 02396 /* communication */ 02397 for(i=0;i<6;i++) { 02398 if(i%2==0) j = i+1; 02399 else j = i-1; 02400 if(node_neighbors[i] != this_node) { 02401 /* two step communication: first all even positions than all odd */ 02402 for(evenodd=0; evenodd<2;evenodd++) { 02403 if((node_pos[i/2]+evenodd)%2==0) 02404 MPI_Send(&(dp3m.local_mesh.margin[i]), 1, MPI_INT, 02405 node_neighbors[i],REQ_P3M_INIT_D,comm_cart); 02406 else 02407 MPI_Recv(&(dp3m.local_mesh.r_margin[j]), 1, MPI_INT, 02408 node_neighbors[j],REQ_P3M_INIT_D,comm_cart,&status); 02409 } 02410 } 02411 else { 02412 dp3m.local_mesh.r_margin[j] = dp3m.local_mesh.margin[i]; 02413 } 02414 } 02415 /* recv grids */ 02416 for(i=0;i<3;i++) 02417 for(j=0;j<3;j++) { 02418 if(j==i) { 02419 dp3m.sm.r_ld[ i*2 ][j] = dp3m.sm.s_ld[ i*2 ][j] + dp3m.local_mesh.margin[2*j]; 02420 dp3m.sm.r_ur[ i*2 ][j] = dp3m.sm.s_ur[ i*2 ][j] + dp3m.local_mesh.r_margin[2*j]; 02421 dp3m.sm.r_ld[(i*2)+1][j] = dp3m.sm.s_ld[(i*2)+1][j] - dp3m.local_mesh.r_margin[(2*j)+1]; 02422 dp3m.sm.r_ur[(i*2)+1][j] = dp3m.sm.s_ur[(i*2)+1][j] - dp3m.local_mesh.margin[(2*j)+1]; 02423 } 02424 else { 02425 dp3m.sm.r_ld[ i*2 ][j] = dp3m.sm.s_ld[ i*2 ][j]; 02426 dp3m.sm.r_ur[ i*2 ][j] = dp3m.sm.s_ur[ i*2 ][j]; 02427 dp3m.sm.r_ld[(i*2)+1][j] = dp3m.sm.s_ld[(i*2)+1][j]; 02428 dp3m.sm.r_ur[(i*2)+1][j] = dp3m.sm.s_ur[(i*2)+1][j]; 02429 } 02430 } 02431 for(i=0;i<6;i++) { 02432 dp3m.sm.r_size[i] = 1; 02433 for(j=0;j<3;j++) { 02434 dp3m.sm.r_dim[i][j] = dp3m.sm.r_ur[i][j]-dp3m.sm.r_ld[i][j]; 02435 dp3m.sm.r_size[i] *= dp3m.sm.r_dim[i][j]; 02436 } 02437 if(dp3m.sm.r_size[i]>dp3m.sm.max) dp3m.sm.max=dp3m.sm.r_size[i]; 02438 } 02439 02440 02441 } 02442 02443 02444 02445 /************************************************/ 02446 02447 void dp3m_scaleby_box_l() { 02448 if (coulomb.Dbjerrum == 0.0) { 02449 return; 02450 } 02451 02452 dp3m.params.r_cut = dp3m.params.r_cut_iL* box_l[0]; 02453 dp3m.params.alpha = dp3m.params.alpha_L * box_l_i[0]; 02454 dp3m_init_a_ai_cao_cut(); 02455 dp3m_calc_lm_ld_pos(); 02456 dp3m_sanity_checks_boxl(); 02457 02458 dp3m_calc_influence_function_force(); 02459 dp3m_calc_influence_function_energy(); 02460 } 02461 02462 /*****************************************************************************/ 02463 02464 02465 /* fucntion to give the dipolar-P3M energy correction -------*/ 02466 void dp3m_compute_constants_energy_dipolar() { 02467 double Eself, Ukp3m; 02468 02469 if (dp3m.energy_correction != 0.0) 02470 return; 02471 02472 P3M_TRACE(fprintf(stderr, "%d: dp3m_compute_constants_energy_dipolar().\n", this_node)); 02473 02474 double volume=box_l[0]*box_l[1]*box_l[2]; 02475 Ukp3m=dp3m_average_dipolar_self_energy(box_l[0],dp3m.params.mesh[0])*volume; 02476 02477 P3M_TRACE(fprintf(stderr, "%d: Average Dipolar Energy = %lf.\n", this_node, Ukp3m)); 02478 02479 Eself=-(2*pow(dp3m.params.alpha_L,3) * wupii/3.0); 02480 02481 dp3m.energy_correction = - dp3m.sum_mu2*(Ukp3m+Eself+2.*PI/3.); 02482 } 02483 02484 /*****************************************************************************/ 02485 02486 02487 /*****************************************************************************/ 02488 02489 #endif /* DP3M */ 02490
1.7.5.1