![]() |
ESPResSo 3.2.0-159-gf5c8922-git
Extensible Simulation Package for Soft Matter Research
|
00001 /* 00002 Copyright (C) 2010,2011,2012,2013 The ESPResSo project 00003 Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 00004 Max-Planck-Institute for Polymer Research, Theory Group 00005 00006 This file is part of ESPResSo. 00007 00008 ESPResSo is free software: you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation, either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 ESPResSo is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program. If not, see <http://www.gnu.org/licenses/>. 00020 */ 00021 /** \file fft-common.c 00022 * 00023 * Routines, row decomposition, data structures and communication for the 3D-FFT. 00024 * 00025 */ 00026 #include "fft-common.h" 00027 00028 #if defined(P3M) || defined(DP3M) 00029 00030 #include <string.h> 00031 #include <fftw3.h> 00032 #include <mpi.h> 00033 00034 #include "utils.h" 00035 #include "communication.h" 00036 00037 00038 void fft_common_pre_init(fft_data_struct *fft) 00039 { 00040 for(int i=0;i<4;i++) { 00041 fft->plan[i].group = malloc(1*n_nodes*sizeof(int)); 00042 fft->plan[i].send_block = NULL; 00043 fft->plan[i].send_size = NULL; 00044 fft->plan[i].recv_block = NULL; 00045 fft->plan[i].recv_size = NULL; 00046 } 00047 00048 fft->init_tag = 0; 00049 fft->max_comm_size = 0; 00050 fft->max_mesh_size = 0; 00051 fft->send_buf = NULL; 00052 fft->recv_buf = NULL; 00053 fft->data_buf = NULL; 00054 } 00055 00056 void fft_pack_block(double *in, double *out, int start[3], int size[3], int dim[3], int element) 00057 { 00058 /* mid and slow changing indices */ 00059 int m,s; 00060 /* linear index of in grid, linear index of out grid */ 00061 int li_in,li_out=0; 00062 /* copy size */ 00063 int copy_size; 00064 /* offsets for indizes in input grid */ 00065 int m_in_offset,s_in_offset; 00066 /* offsets for indizes in output grid */ 00067 int m_out_offset; 00068 00069 copy_size = element * size[2] * sizeof(double); 00070 m_in_offset = element * dim[2]; 00071 s_in_offset = element * (dim[2] * (dim[1] - size[1])); 00072 m_out_offset = element * size[2]; 00073 li_in = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0])); 00074 00075 for(s=0 ;s<size[0]; s++) { 00076 for(m=0; m<size[1]; m++) { 00077 memcpy(&(out[li_out]), &(in[li_in]), copy_size); 00078 li_in += m_in_offset; 00079 li_out += m_out_offset; 00080 } 00081 li_in += s_in_offset; 00082 } 00083 } 00084 00085 void fft_pack_block_permute1(double *in, double *out, int start[3], int size[3], 00086 int dim[3], int element) 00087 { 00088 /* slow,mid and fast changing indices for input grid */ 00089 int s,m,f,e; 00090 /* linear index of in grid, linear index of out grid */ 00091 int li_in,li_out=0; 00092 /* offsets for indizes in input grid */ 00093 int m_in_offset,s_in_offset; 00094 /* offset for mid changing indices of output grid */ 00095 int m_out_offset; 00096 00097 m_in_offset = element * (dim[2] - size[2]); 00098 s_in_offset = element * (dim[2] * (dim[1] - size[1])); 00099 m_out_offset = (element * size[0]) - element; 00100 li_in = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0])); 00101 00102 for(s=0 ;s<size[0]; s++) { /* fast changing out */ 00103 li_out = element*s; 00104 for(m=0; m<size[1]; m++) { /* slow changing out */ 00105 for(f=0 ;f<size[2]; f++) { /* mid changing out */ 00106 for(e=0; e<element; e++) out[li_out++] = in[li_in++]; 00107 li_out += m_out_offset; 00108 } 00109 li_in += m_in_offset; 00110 } 00111 li_in += s_in_offset; 00112 } 00113 00114 } 00115 00116 void fft_pack_block_permute2(double *in, double *out, int start[3], int size[3], 00117 int dim[3],int element) 00118 { 00119 /* slow,mid and fast changing indices for input grid */ 00120 int s,m,f,e; 00121 /* linear index of in grid, linear index of out grid */ 00122 int li_in,li_out=0; 00123 /* offsets for indizes in input grid */ 00124 int m_in_offset,s_in_offset; 00125 /* offset for slow changing index of output grid */ 00126 int s_out_offset; 00127 /* start index for mid changing index of output grid */ 00128 int m_out_start; 00129 00130 m_in_offset = element * (dim[2]-size[2]); 00131 s_in_offset = element * (dim[2] * (dim[1]-size[1])); 00132 s_out_offset = (element * size[0] * size[1]) - element; 00133 li_in = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0])); 00134 00135 for(s=0 ;s<size[0]; s++) { /* mid changing out */ 00136 m_out_start = element*(s * size[1]); 00137 for(m=0; m<size[1]; m++) { /* fast changing out */ 00138 li_out = m_out_start + element*m; 00139 for(f=0 ;f<size[2]; f++) { /* slow changing out */ 00140 for(e=0; e<element; e++) out[li_out++] = in[li_in++]; 00141 li_out += s_out_offset; 00142 } 00143 li_in += m_in_offset; 00144 } 00145 li_in += s_in_offset; 00146 } 00147 00148 } 00149 00150 void fft_unpack_block(double *in, double *out, int start[3], int size[3], 00151 int dim[3], int element) 00152 { 00153 /* mid and slow changing indices */ 00154 int m,s; 00155 /* linear index of in grid, linear index of out grid */ 00156 int li_in=0,li_out; 00157 /* copy size */ 00158 int copy_size; 00159 /* offset for indizes in input grid */ 00160 int m_in_offset; 00161 /* offsets for indizes in output grid */ 00162 int m_out_offset,s_out_offset; 00163 00164 copy_size = element * (size[2] * sizeof(double)); 00165 m_out_offset = element * dim[2]; 00166 s_out_offset = element * (dim[2] * (dim[1] - size[1])); 00167 m_in_offset = element * size[2]; 00168 li_out = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0])); 00169 00170 for(s=0 ;s<size[0]; s++) { 00171 for(m=0; m<size[1]; m++) { 00172 memcpy(&(out[li_out]), &(in[li_in]), copy_size); 00173 li_in += m_in_offset; 00174 li_out += m_out_offset; 00175 } 00176 li_out += s_out_offset; 00177 } 00178 00179 } 00180 00181 /************************************************ 00182 * privat functions 00183 ************************************************/ 00184 00185 int fft_find_comm_groups(int grid1[3], int grid2[3], int *node_list1, int *node_list2, 00186 int *group, int *pos, int *my_pos) 00187 { 00188 int i; 00189 /* communication group cell size on grid1 and grid2 */ 00190 int s1[3], s2[3]; 00191 /* The communication group cells build the same super grid on grid1 and grid2 */ 00192 int ds[3]; 00193 /* communication group size */ 00194 int g_size=1; 00195 /* comm. group cell index */ 00196 int gi[3]; 00197 /* position of a node in a grid */ 00198 int p1[3], p2[3]; 00199 /* node identity */ 00200 int n; 00201 /* this_node position in the communication group. */ 00202 int c_pos=-1; 00203 /* flag for group identification */ 00204 int my_group=0; 00205 00206 FFT_TRACE(fprintf(stderr,"%d: fft_find_comm_groups:\n",this_node)); 00207 FFT_TRACE(fprintf(stderr,"%d: for grid1=(%d,%d,%d) and grids=(%d,%d,%d)\n", 00208 this_node,grid1[0],grid1[1],grid1[2],grid2[0],grid2[1],grid2[2])); 00209 00210 /* calculate dimension of comm. group cells for both grids */ 00211 if( (grid1[0]*grid1[1]*grid1[2]) != (grid2[0]*grid2[1]*grid2[2]) ) return -1; /* unlike number of nodes */ 00212 for(i=0;i<3;i++) { 00213 s1[i] = grid1[i] / grid2[i]; 00214 if(s1[i] == 0) s1[i] = 1; 00215 else if(grid1[i] != grid2[i]*s1[i]) return -1; /* grids do not match!!! */ 00216 00217 s2[i] = grid2[i] / grid1[i]; 00218 if(s2[i] == 0) s2[i] = 1; 00219 else if(grid2[i] != grid1[i]*s2[i]) return -1; /* grids do not match!!! */ 00220 00221 ds[i] = grid2[i] / s2[i]; 00222 g_size *= s2[i]; 00223 } 00224 00225 /* calc node_list2 */ 00226 /* loop through all comm. group cells */ 00227 for(gi[2] = 0; gi[2] < ds[2]; gi[2]++) 00228 for(gi[1] = 0; gi[1] < ds[1]; gi[1]++) 00229 for(gi[0] = 0; gi[0] < ds[0]; gi[0]++) { 00230 /* loop through all nodes in that comm. group cell */ 00231 for(i=0;i<g_size;i++) { 00232 p1[0] = (gi[0]*s1[0]) + (i%s1[0]); 00233 p1[1] = (gi[1]*s1[1]) + ((i/s1[0])%s1[1]); 00234 p1[2] = (gi[2]*s1[2]) + (i/(s1[0]*s1[1])); 00235 00236 p2[0] = (gi[0]*s2[0]) + (i%s2[0]); 00237 p2[1] = (gi[1]*s2[1]) + ((i/s2[0])%s2[1]); 00238 p2[2] = (gi[2]*s2[2]) + (i/(s2[0]*s2[1])); 00239 00240 n = node_list1[ get_linear_index(p1[0],p1[1],p1[2],grid1) ]; 00241 node_list2[ get_linear_index(p2[0],p2[1],p2[2],grid2) ] = n ; 00242 00243 pos[3*n+0] = p2[0]; pos[3*n+1] = p2[1]; pos[3*n+2] = p2[2]; 00244 if(my_group==1) group[i] = n; 00245 if(n==this_node && my_group==0) { 00246 my_group = 1; 00247 c_pos = i; 00248 my_pos[0] = p2[0]; my_pos[1] = p2[1]; my_pos[2] = p2[2]; 00249 i=-1; /* restart the loop */ 00250 } 00251 } 00252 my_group=0; 00253 } 00254 00255 /* permute comm. group according to the nodes position in the group */ 00256 /* This is necessary to have matching node pairs during communication! */ 00257 while( c_pos>0 ) { 00258 n=group[g_size-1]; 00259 for(i=g_size-1; i>0; i--) group[i] = group[i-1]; 00260 group[0] = n; 00261 c_pos--; 00262 } 00263 return g_size; 00264 } 00265 00266 int fft_calc_local_mesh(int n_pos[3], int n_grid[3], int mesh[3], double mesh_off[3], 00267 int loc_mesh[3], int start[3]) 00268 { 00269 int i, last[3], size=1; 00270 00271 for(i=0;i<3;i++) { 00272 start[i] = (int)ceil( (mesh[i]/(double)n_grid[i])*n_pos[i] - mesh_off[i] ); 00273 last[i] = (int)floor((mesh[i]/(double)n_grid[i])*(n_pos[i]+1) - mesh_off[i] ); 00274 /* correct round off errors */ 00275 if( (mesh[i]/(double)n_grid[i])*(n_pos[i]+1) - mesh_off[i] - last[i] < 1.0e-15 ) last[i]--; 00276 if(1.0+ (mesh[i]/(double)n_grid[i])*n_pos[i]-mesh_off[i]-start[i] < 1.0e-15 ) start[i]--; 00277 loc_mesh[i] = last[i]-start[i]+1; 00278 size *= loc_mesh[i]; 00279 } 00280 return size; 00281 } 00282 00283 00284 int fft_calc_send_block(int pos1[3], int grid1[3], int pos2[3], int grid2[3], 00285 int mesh[3], double mesh_off[3], int block[6]) 00286 { 00287 int i,size=1; 00288 int mesh1[3], first1[3], last1[3]; 00289 int mesh2[3], first2[3], last2[3]; 00290 00291 fft_calc_local_mesh(pos1, grid1, mesh, mesh_off, mesh1, first1); 00292 fft_calc_local_mesh(pos2, grid2, mesh, mesh_off, mesh2, first2); 00293 00294 for(i=0;i<3;i++) { 00295 last1[i] = first1[i] + mesh1[i] -1; 00296 last2[i] = first2[i] + mesh2[i] -1; 00297 block[i ] = imax(first1[i],first2[i]) - first1[i]; 00298 block[i+3] = (imin(last1[i], last2[i] ) - first1[i])-block[i]+1; 00299 size *= block[i+3]; 00300 } 00301 return size; 00302 } 00303 00304 void fft_print_fft_plan(fft_forw_plan pl) 00305 { 00306 int i; 00307 00308 fprintf(stderr,"%d: dir=%d, row_dir=%d, n_permute=%d, n_ffts=%d\n", 00309 this_node, pl.dir, pl.row_dir, pl.n_permute, pl.n_ffts); 00310 00311 fprintf(stderr,"%d: local: old_mesh=(%d,%d,%d), new_mesh=(%d,%d,%d), start=(%d,%d,%d)\n",this_node, 00312 pl.old_mesh[0], pl.old_mesh[1], pl.old_mesh[2], 00313 pl.new_mesh[0], pl.new_mesh[1], pl.new_mesh[2], 00314 pl.start[0], pl.start[1], pl.start[2]); 00315 00316 fprintf(stderr,"%d: g_size=%d group=(",this_node,pl.g_size); 00317 for(i=0;i<pl.g_size-1;i++) fprintf(stderr,"%d,", pl.group[i]); 00318 fprintf(stderr,"%d)\n",pl.group[pl.g_size-1]); 00319 00320 fprintf(stderr,"%d: send=[",this_node); 00321 for(i=0;i<pl.g_size;i++) fprintf(stderr,"(%d,%d,%d)+(%d,%d,%d), ", 00322 pl.send_block[6*i+0], pl.send_block[6*i+1], pl.send_block[6*i+2], 00323 pl.send_block[6*i+3], pl.send_block[6*i+4], pl.send_block[6*i+5]); 00324 fprintf(stderr,"]\n%d: recv=[",this_node); 00325 for(i=0;i<pl.g_size;i++) fprintf(stderr,"(%d,%d,%d)+(%d,%d,%d), ", 00326 pl.recv_block[6*i+0], pl.recv_block[6*i+1], pl.recv_block[6*i+2], 00327 pl.recv_block[6*i+3], pl.recv_block[6*i+4], pl.recv_block[6*i+5]); 00328 fprintf(stderr,"]\n"); 00329 00330 fflush(stderr); 00331 } 00332 00333 void fft_print_global_fft_mesh(fft_forw_plan plan, double *data, int element, int num) 00334 { 00335 int i0,i1,i2,b=1; 00336 int mesh,divide=0,block1=-1,start1; 00337 int st[3],en[3],si[3]; 00338 int my=-1; 00339 double tmp; 00340 00341 for(i1=0;i1<3;i1++) { 00342 st[i1] = plan.start[i1]; 00343 en[i1] = plan.start[i1]+plan.new_mesh[i1]; 00344 si[i1] = plan.new_mesh[i1]; 00345 } 00346 00347 mesh = plan.new_mesh[2]; 00348 MPI_Barrier(comm_cart); 00349 if(this_node==0) fprintf(stderr,"All: Print Global Mesh: (%d of %d elements)\n", 00350 num+1,element); 00351 MPI_Barrier(comm_cart); 00352 for(i0=0;i0<n_nodes;i0++) { 00353 MPI_Barrier(comm_cart); 00354 if(i0==this_node) fprintf(stderr,"%d: range (%d,%d,%d)-(%d,%d,%d)\n",this_node,st[0],st[1],st[2],en[0],en[1],en[2]); 00355 } 00356 MPI_Barrier(comm_cart); 00357 while(divide==0) { 00358 if(b*mesh > 7) { 00359 block1=b; 00360 divide = (int)ceil(mesh/(double)block1); 00361 } 00362 b++; 00363 } 00364 00365 for(b=0;b<divide;b++) { 00366 start1 = b*block1; 00367 for(i0=mesh-1; i0>=0; i0--) { 00368 for(i1=start1; i1<imin(start1+block1,mesh);i1++) { 00369 for(i2=0; i2<mesh;i2++) { 00370 if(i0>=st[0] && i0<en[0] && i1>=st[1] && 00371 i1<en[1] && i2>=st[2] && i2<en[2]) my=1; 00372 else my=0; 00373 MPI_Barrier(comm_cart); 00374 if(my==1) { 00375 00376 tmp=data[num+(element*((i2-st[2])+si[2]*((i1-st[1])+si[1]*(i0-st[0]))))]; 00377 if(fabs(tmp)>1.0e-15) { 00378 if(tmp<0) fprintf(stderr,"%1.2e",tmp); 00379 else fprintf(stderr," %1.2e",tmp); 00380 } 00381 else { 00382 fprintf(stderr," %1.2e",0.0); 00383 } 00384 } 00385 MPI_Barrier(comm_cart); 00386 } 00387 if(my==1) fprintf(stderr," | "); 00388 } 00389 if(my==1) fprintf(stderr,"\n"); 00390 } 00391 if(my==1) fprintf(stderr,"\n"); 00392 } 00393 00394 } 00395 00396 00397 #endif /* defined(P3M) || defined(DP3M) */
1.7.5.1