ESPResSo 3.2.0-159-gf5c8922-git
Extensible Simulation Package for Soft Matter Research
fft-common.c
Go to the documentation of this file.
00001 /*
00002   Copyright (C) 2010,2011,2012,2013 The ESPResSo project
00003   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
00004     Max-Planck-Institute for Polymer Research, Theory Group
00005   
00006   This file is part of ESPResSo.
00007   
00008   ESPResSo is free software: you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation, either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   ESPResSo is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
00020 */
00021 /** \file fft-common.c
00022  *
00023  *  Routines, row decomposition, data structures and communication for the 3D-FFT. 
00024  *
00025 */
00026 #include "fft-common.h"
00027 
00028 #if defined(P3M) || defined(DP3M)
00029 
00030 #include <string.h>
00031 #include <fftw3.h>
00032 #include <mpi.h>
00033 
00034 #include "utils.h"
00035 #include "communication.h"
00036 
00037 
00038 void fft_common_pre_init(fft_data_struct *fft)
00039 {
00040   for(int i=0;i<4;i++) {
00041     fft->plan[i].group = malloc(1*n_nodes*sizeof(int));
00042     fft->plan[i].send_block = NULL;
00043     fft->plan[i].send_size  = NULL;
00044     fft->plan[i].recv_block = NULL;
00045     fft->plan[i].recv_size  = NULL;
00046   }
00047 
00048   fft->init_tag = 0;
00049   fft->max_comm_size = 0;
00050   fft->max_mesh_size = 0;
00051   fft->send_buf = NULL;
00052   fft->recv_buf = NULL;
00053   fft->data_buf = NULL;
00054 }
00055 
00056 void fft_pack_block(double *in, double *out, int start[3], int size[3], int dim[3], int element)
00057 {
00058   /* mid and slow changing indices */
00059   int m,s;
00060   /* linear index of in grid, linear index of out grid */
00061   int li_in,li_out=0;
00062   /* copy size */
00063   int copy_size;
00064   /* offsets for indizes in input grid */
00065   int m_in_offset,s_in_offset;
00066   /* offsets for indizes in output grid */
00067   int m_out_offset;
00068 
00069   copy_size    = element * size[2] * sizeof(double);
00070   m_in_offset  = element * dim[2];
00071   s_in_offset  = element * (dim[2] * (dim[1] - size[1]));
00072   m_out_offset = element * size[2];
00073   li_in        = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0]));
00074 
00075   for(s=0 ;s<size[0]; s++) {
00076     for(m=0; m<size[1]; m++) {
00077       memcpy(&(out[li_out]), &(in[li_in]), copy_size);
00078       li_in  += m_in_offset;
00079       li_out += m_out_offset;
00080     }
00081     li_in += s_in_offset;
00082   }
00083 }
00084 
00085 void fft_pack_block_permute1(double *in, double *out, int start[3], int size[3], 
00086                          int dim[3], int element)
00087 {
00088   /* slow,mid and fast changing indices for input  grid */
00089   int s,m,f,e;
00090   /* linear index of in grid, linear index of out grid */
00091   int li_in,li_out=0;
00092   /* offsets for indizes in input grid */
00093   int m_in_offset,s_in_offset;
00094   /* offset for mid changing indices of output grid */
00095   int m_out_offset;
00096 
00097   m_in_offset  =  element * (dim[2] - size[2]);
00098   s_in_offset  =  element * (dim[2] * (dim[1] - size[1]));
00099   m_out_offset = (element * size[0]) - element;
00100   li_in        =  element * (start[2]+dim[2]*(start[1]+dim[1]*start[0]));
00101 
00102   for(s=0 ;s<size[0]; s++) {      /* fast changing out */
00103     li_out = element*s;
00104     for(m=0; m<size[1]; m++) {    /* slow changing out */
00105       for(f=0 ;f<size[2]; f++) {  /* mid  changing out */
00106         for(e=0; e<element; e++) out[li_out++] = in[li_in++];
00107         li_out += m_out_offset;
00108       }
00109       li_in  += m_in_offset;
00110     }
00111     li_in += s_in_offset;
00112   }
00113 
00114 }
00115 
00116 void fft_pack_block_permute2(double *in, double *out, int start[3], int size[3], 
00117                          int dim[3],int element)
00118 {
00119   /* slow,mid and fast changing indices for input  grid */
00120   int s,m,f,e;
00121   /* linear index of in grid, linear index of out grid */
00122   int li_in,li_out=0;
00123   /* offsets for indizes in input grid */
00124   int m_in_offset,s_in_offset;
00125   /* offset for slow changing index of output grid */
00126   int s_out_offset;
00127   /* start index for mid changing index of output grid */
00128   int m_out_start;
00129 
00130   m_in_offset  = element * (dim[2]-size[2]);
00131   s_in_offset  = element * (dim[2] * (dim[1]-size[1]));
00132   s_out_offset = (element * size[0] * size[1]) - element;
00133   li_in        = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0]));
00134 
00135   for(s=0 ;s<size[0]; s++) {      /* mid changing out */
00136     m_out_start = element*(s * size[1]);
00137     for(m=0; m<size[1]; m++) {    /* fast changing out */
00138       li_out = m_out_start + element*m;
00139       for(f=0 ;f<size[2]; f++) {  /* slow  changing out */
00140         for(e=0; e<element; e++) out[li_out++] = in[li_in++];
00141         li_out += s_out_offset;
00142       }
00143       li_in += m_in_offset; 
00144     }
00145     li_in += s_in_offset; 
00146   }
00147 
00148 }
00149 
00150 void fft_unpack_block(double *in, double *out, int start[3], int size[3], 
00151                   int dim[3], int element)
00152 {
00153   /* mid and slow changing indices */
00154   int m,s;
00155   /* linear index of in grid, linear index of out grid */
00156   int li_in=0,li_out;
00157   /* copy size */
00158   int copy_size;
00159   /* offset for indizes in input grid */
00160   int m_in_offset;
00161   /* offsets for indizes in output grid */
00162   int m_out_offset,s_out_offset;
00163 
00164   copy_size    = element * (size[2] * sizeof(double));
00165   m_out_offset = element * dim[2];
00166   s_out_offset = element * (dim[2] * (dim[1] - size[1]));
00167   m_in_offset  = element * size[2];
00168   li_out       = element * (start[2]+dim[2]*(start[1]+dim[1]*start[0]));
00169 
00170   for(s=0 ;s<size[0]; s++) {
00171     for(m=0; m<size[1]; m++) {
00172       memcpy(&(out[li_out]), &(in[li_in]), copy_size);
00173       li_in  += m_in_offset;
00174       li_out += m_out_offset;
00175     }
00176     li_out += s_out_offset;
00177   }
00178 
00179 }
00180 
00181 /************************************************
00182  * privat functions
00183  ************************************************/
00184 
00185 int fft_find_comm_groups(int grid1[3], int grid2[3], int *node_list1, int *node_list2, 
00186                      int *group, int *pos, int *my_pos)
00187 {
00188   int i;
00189   /* communication group cell size on grid1 and grid2 */
00190   int s1[3], s2[3];
00191   /* The communication group cells build the same super grid on grid1 and grid2 */
00192   int ds[3];
00193   /* communication group size */
00194   int g_size=1;
00195   /* comm. group cell index */
00196   int gi[3];
00197   /* position of a node in a grid */
00198   int p1[3], p2[3];
00199   /* node identity */
00200   int n;
00201   /* this_node position in the communication group. */
00202   int c_pos=-1;
00203   /* flag for group identification */
00204   int my_group=0;
00205 
00206   FFT_TRACE(fprintf(stderr,"%d: fft_find_comm_groups:\n",this_node));
00207   FFT_TRACE(fprintf(stderr,"%d: for grid1=(%d,%d,%d) and grids=(%d,%d,%d)\n",
00208                     this_node,grid1[0],grid1[1],grid1[2],grid2[0],grid2[1],grid2[2]));
00209 
00210   /* calculate dimension of comm. group cells for both grids */ 
00211   if( (grid1[0]*grid1[1]*grid1[2]) != (grid2[0]*grid2[1]*grid2[2]) ) return -1; /* unlike number of nodes */
00212   for(i=0;i<3;i++) {
00213     s1[i] = grid1[i] / grid2[i];
00214     if(s1[i] == 0) s1[i] = 1;
00215     else if(grid1[i] != grid2[i]*s1[i]) return -1; /* grids do not match!!! */
00216 
00217     s2[i] = grid2[i] / grid1[i];
00218     if(s2[i] == 0) s2[i] = 1;
00219     else if(grid2[i] != grid1[i]*s2[i]) return -1; /* grids do not match!!! */
00220 
00221     ds[i] = grid2[i] / s2[i]; 
00222     g_size *= s2[i];
00223   }
00224 
00225   /* calc node_list2 */
00226   /* loop through all comm. group cells */
00227   for(gi[2] = 0; gi[2] < ds[2]; gi[2]++) 
00228     for(gi[1] = 0; gi[1] < ds[1]; gi[1]++)
00229       for(gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
00230         /* loop through all nodes in that comm. group cell */
00231         for(i=0;i<g_size;i++) {
00232           p1[0] = (gi[0]*s1[0]) + (i%s1[0]);
00233           p1[1] = (gi[1]*s1[1]) + ((i/s1[0])%s1[1]);
00234           p1[2] = (gi[2]*s1[2]) + (i/(s1[0]*s1[1]));
00235 
00236           p2[0] = (gi[0]*s2[0]) + (i%s2[0]);
00237           p2[1] = (gi[1]*s2[1]) + ((i/s2[0])%s2[1]);
00238           p2[2] = (gi[2]*s2[2]) + (i/(s2[0]*s2[1]));
00239 
00240           n = node_list1[ get_linear_index(p1[0],p1[1],p1[2],grid1) ];
00241           node_list2[ get_linear_index(p2[0],p2[1],p2[2],grid2) ] = n ;
00242 
00243           pos[3*n+0] = p2[0];  pos[3*n+1] = p2[1];  pos[3*n+2] = p2[2];   
00244           if(my_group==1) group[i] = n;
00245           if(n==this_node && my_group==0) { 
00246             my_group = 1; 
00247             c_pos = i;
00248             my_pos[0] = p2[0]; my_pos[1] = p2[1]; my_pos[2] = p2[2];
00249             i=-1; /* restart the loop */ 
00250           }
00251         }
00252         my_group=0;
00253       }
00254 
00255   /* permute comm. group according to the nodes position in the group */
00256   /* This is necessary to have matching node pairs during communication! */
00257   while( c_pos>0 ) {
00258     n=group[g_size-1];
00259     for(i=g_size-1; i>0; i--) group[i] = group[i-1];
00260     group[0] = n;
00261     c_pos--;
00262   }
00263   return g_size;
00264 }
00265 
00266 int fft_calc_local_mesh(int n_pos[3], int n_grid[3], int mesh[3], double mesh_off[3], 
00267                      int loc_mesh[3], int start[3])
00268 {
00269   int i, last[3], size=1;
00270   
00271   for(i=0;i<3;i++) {
00272     start[i] = (int)ceil( (mesh[i]/(double)n_grid[i])*n_pos[i]     - mesh_off[i] );
00273     last[i]  = (int)floor((mesh[i]/(double)n_grid[i])*(n_pos[i]+1) - mesh_off[i] );
00274     /* correct round off errors */
00275     if( (mesh[i]/(double)n_grid[i])*(n_pos[i]+1) - mesh_off[i] - last[i] < 1.0e-15 ) last[i]--;
00276     if(1.0+ (mesh[i]/(double)n_grid[i])*n_pos[i]-mesh_off[i]-start[i] < 1.0e-15 ) start[i]--;
00277     loc_mesh[i] = last[i]-start[i]+1;
00278     size *= loc_mesh[i];
00279   }
00280   return size;
00281 }
00282 
00283 
00284 int fft_calc_send_block(int pos1[3], int grid1[3], int pos2[3], int grid2[3], 
00285                     int mesh[3], double mesh_off[3], int block[6])
00286 {
00287   int i,size=1;
00288   int mesh1[3], first1[3], last1[3];
00289   int mesh2[3], first2[3], last2[3];
00290 
00291   fft_calc_local_mesh(pos1, grid1, mesh, mesh_off, mesh1, first1);
00292   fft_calc_local_mesh(pos2, grid2, mesh, mesh_off, mesh2, first2);
00293 
00294   for(i=0;i<3;i++) {
00295     last1[i] = first1[i] + mesh1[i] -1;
00296     last2[i] = first2[i] + mesh2[i] -1;
00297     block[i  ] = imax(first1[i],first2[i]) - first1[i];
00298     block[i+3] = (imin(last1[i], last2[i] ) - first1[i])-block[i]+1;
00299     size *= block[i+3];
00300   }
00301   return size;
00302 }
00303 
00304 void fft_print_fft_plan(fft_forw_plan pl)
00305 {
00306   int i;
00307 
00308   fprintf(stderr,"%d: dir=%d, row_dir=%d, n_permute=%d, n_ffts=%d\n",
00309           this_node, pl.dir,  pl.row_dir, pl.n_permute, pl.n_ffts);
00310 
00311   fprintf(stderr,"%d:    local: old_mesh=(%d,%d,%d), new_mesh=(%d,%d,%d), start=(%d,%d,%d)\n",this_node,
00312           pl.old_mesh[0],  pl.old_mesh[1],  pl.old_mesh[2], 
00313           pl.new_mesh[0],  pl.new_mesh[1],  pl.new_mesh[2], 
00314           pl.start[0], pl.start[1],  pl.start[2]);
00315 
00316   fprintf(stderr,"%d:    g_size=%d group=(",this_node,pl.g_size);
00317   for(i=0;i<pl.g_size-1;i++) fprintf(stderr,"%d,", pl.group[i]);
00318   fprintf(stderr,"%d)\n",pl.group[pl.g_size-1]);
00319 
00320   fprintf(stderr,"%d:    send=[",this_node);
00321   for(i=0;i<pl.g_size;i++) fprintf(stderr,"(%d,%d,%d)+(%d,%d,%d), ",
00322                                    pl.send_block[6*i+0], pl.send_block[6*i+1], pl.send_block[6*i+2],
00323                                    pl.send_block[6*i+3], pl.send_block[6*i+4], pl.send_block[6*i+5]);
00324   fprintf(stderr,"]\n%d:    recv=[",this_node);
00325   for(i=0;i<pl.g_size;i++) fprintf(stderr,"(%d,%d,%d)+(%d,%d,%d), ",
00326                                    pl.recv_block[6*i+0], pl.recv_block[6*i+1], pl.recv_block[6*i+2],
00327                                    pl.recv_block[6*i+3], pl.recv_block[6*i+4], pl.recv_block[6*i+5]);
00328   fprintf(stderr,"]\n");
00329  
00330  fflush(stderr);
00331 }
00332 
00333 void fft_print_global_fft_mesh(fft_forw_plan plan, double *data, int element, int num)
00334 {
00335   int i0,i1,i2,b=1;
00336   int mesh,divide=0,block1=-1,start1;
00337   int st[3],en[3],si[3];
00338   int my=-1;
00339   double tmp;
00340 
00341   for(i1=0;i1<3;i1++) {
00342     st[i1] = plan.start[i1];
00343     en[i1] = plan.start[i1]+plan.new_mesh[i1];
00344     si[i1] = plan.new_mesh[i1];
00345   }
00346 
00347   mesh = plan.new_mesh[2];
00348   MPI_Barrier(comm_cart);  
00349   if(this_node==0) fprintf(stderr,"All: Print Global Mesh: (%d of %d elements)\n",
00350                            num+1,element);
00351   MPI_Barrier(comm_cart);
00352   for(i0=0;i0<n_nodes;i0++) {
00353     MPI_Barrier(comm_cart);
00354     if(i0==this_node) fprintf(stderr,"%d: range (%d,%d,%d)-(%d,%d,%d)\n",this_node,st[0],st[1],st[2],en[0],en[1],en[2]);
00355   }
00356   MPI_Barrier(comm_cart);
00357   while(divide==0) {
00358     if(b*mesh > 7) {
00359       block1=b;
00360       divide = (int)ceil(mesh/(double)block1);
00361     }
00362     b++;
00363   }
00364 
00365   for(b=0;b<divide;b++) {
00366     start1 = b*block1;
00367     for(i0=mesh-1; i0>=0; i0--) {
00368       for(i1=start1; i1<imin(start1+block1,mesh);i1++) {
00369         for(i2=0; i2<mesh;i2++) {
00370           if(i0>=st[0] && i0<en[0] && i1>=st[1] && 
00371              i1<en[1] && i2>=st[2] && i2<en[2]) my=1;
00372           else my=0;
00373           MPI_Barrier(comm_cart);
00374           if(my==1) {
00375            
00376             tmp=data[num+(element*((i2-st[2])+si[2]*((i1-st[1])+si[1]*(i0-st[0]))))];
00377             if(fabs(tmp)>1.0e-15) {
00378               if(tmp<0) fprintf(stderr,"%1.2e",tmp);
00379               else      fprintf(stderr," %1.2e",tmp);
00380             }
00381             else {
00382               fprintf(stderr," %1.2e",0.0);
00383             }
00384           }
00385           MPI_Barrier(comm_cart);
00386         }
00387         if(my==1) fprintf(stderr," | ");
00388       }
00389       if(my==1) fprintf(stderr,"\n");
00390     }
00391     if(my==1) fprintf(stderr,"\n");
00392   }
00393 
00394 }
00395 
00396 
00397 #endif /* defined(P3M) || defined(DP3M) */