ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
fft-dipolar.c
Go to the documentation of this file.
00001 /*
00002   Copyright (C) 2010,2011,2012,2013 The ESPResSo project
00003   Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
00004     Max-Planck-Institute for Polymer Research, Theory Group
00005   
00006   This file is part of ESPResSo.
00007   
00008   ESPResSo is free software: you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation, either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   ESPResSo is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
00020 */
00021 /** \file fft.c
00022  *
00023  *  Routines, row decomposition, data structures and communication for the 3D-FFT. 
00024  *
00025 */
00026 
00027 
00028 #include "fft-dipolar.h"
00029 
00030 #ifdef DP3M
00031 
00032 #include <fftw3.h>
00033 /* our remapping of malloc interferes with fftw3's name mangling. */
00034 void *fftw_malloc(size_t n);
00035 
00036 #include <mpi.h>
00037 #include "communication.h"
00038 #include "grid.h"
00039 #include "fft-common.h"
00040 
00041 /************************************************
00042  * variables
00043  ************************************************/
00044 
00045 fft_data_struct dfft;
00046 
00047 /** communicate the grid data according to the given fft_forw_plan. 
00048  * \param plan communication plan (see \ref fft_forw_plan).
00049  * \param in   input mesh.
00050  * \param out  output mesh.
00051 */
00052 void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out);
00053 
00054 /** communicate the grid data according to the given fft_forw_plan/fft_bakc_plan. 
00055  * \param plan_f communication plan (see \ref fft_forw_plan).
00056  * \param plan_b additional back plan (see \ref fft_back_plan).
00057  * \param in     input mesh.
00058  * \param out    output mesh.
00059 */
00060 void dfft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out);
00061 
00062 
00063 void dfft_pre_init()
00064 {
00065   fft_common_pre_init(&dfft);
00066 }
00067 
00068 int dfft_init(double **data, 
00069               int *local_mesh_dim, int *local_mesh_margin, 
00070               int* global_mesh_dim, double *global_mesh_off,
00071               int *ks_pnum)
00072 {
00073   int i,j;
00074   /* helpers */
00075   int mult[3];
00076 
00077   int n_grid[4][3]; /* The four node grids. */
00078   int my_pos[4][3]; /* The position of this_node in the node grids. */
00079   int *n_id[4];     /* linear node identity lists for the node grids. */
00080   int *n_pos[4];    /* positions of nodes in the node grids. */
00081   /* FFTW WISDOM stuff. */
00082   char wisdom_file_name[255];
00083   FILE *wisdom_file;
00084   int wisdom_status;
00085 
00086   FFT_TRACE(fprintf(stderr,"%d: dipolar dfft_init():\n",this_node));
00087 
00088 
00089   dfft.max_comm_size=0; dfft.max_mesh_size=0;
00090   for(i=0;i<4;i++) {
00091     n_id[i]  = malloc(1*n_nodes*sizeof(int));
00092     n_pos[i] = malloc(3*n_nodes*sizeof(int));
00093   }
00094 
00095   /* === node grids === */
00096   /* real space node grid (n_grid[0]) */
00097   for(i=0;i<3;i++) {
00098     n_grid[0][i] = node_grid[i];
00099     my_pos[0][i] = node_pos[i];
00100   }
00101   for(i=0;i<n_nodes;i++) {
00102     map_node_array(i,&(n_pos[0][3*i+0]));
00103     n_id[0][get_linear_index( n_pos[0][3*i+0],n_pos[0][3*i+1],n_pos[0][3*i+2], n_grid[0])] = i;
00104   }
00105     
00106   /* FFT node grids (n_grid[1 - 3]) */
00107   calc_2d_grid(n_nodes,n_grid[1]);
00108   /* resort n_grid[1] dimensions if necessary */
00109   dfft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult);
00110   dfft.plan[0].n_permute = 0;
00111   for(i=1;i<4;i++) dfft.plan[i].n_permute = (dfft.plan[1].row_dir+i)%3;
00112   for(i=0;i<3;i++) {
00113     n_grid[2][i] = n_grid[1][(i+1)%3];
00114     n_grid[3][i] = n_grid[1][(i+2)%3];
00115   }
00116   dfft.plan[2].row_dir = (dfft.plan[1].row_dir-1)%3;
00117   dfft.plan[3].row_dir = (dfft.plan[1].row_dir-2)%3;
00118 
00119 
00120 
00121   /* === communication groups === */
00122   /* copy local mesh off real space charge assignment grid */
00123   for(i=0;i<3;i++) dfft.plan[0].new_mesh[i] = local_mesh_dim[i];
00124   for(i=1; i<4;i++) {
00125     dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], 
00126                                         dfft.plan[i].group, n_pos[i], my_pos[i]);
00127     if(dfft.plan[i].g_size==-1) {
00128       /* try permutation */
00129       j = n_grid[i][(dfft.plan[i].row_dir+1)%3];
00130       n_grid[i][(dfft.plan[i].row_dir+1)%3] = n_grid[i][(dfft.plan[i].row_dir+2)%3];
00131       n_grid[i][(dfft.plan[i].row_dir+2)%3] = j;
00132       dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], 
00133                                           dfft.plan[i].group, n_pos[i], my_pos[i]);
00134       if(dfft.plan[i].g_size==-1) {
00135         fprintf(stderr,"%d: dipolar INTERNAL ERROR: fft_find_comm_groups error\n", this_node);
00136         errexit();
00137       }
00138     }
00139 
00140     dfft.plan[i].send_block = (int *)realloc(dfft.plan[i].send_block, 6*dfft.plan[i].g_size*sizeof(int));
00141     dfft.plan[i].send_size  = (int *)realloc(dfft.plan[i].send_size, 1*dfft.plan[i].g_size*sizeof(int));
00142     dfft.plan[i].recv_block = (int *)realloc(dfft.plan[i].recv_block, 6*dfft.plan[i].g_size*sizeof(int));
00143     dfft.plan[i].recv_size  = (int *)realloc(dfft.plan[i].recv_size, 1*dfft.plan[i].g_size*sizeof(int));
00144 
00145     dfft.plan[i].new_size = fft_calc_local_mesh(my_pos[i], n_grid[i], global_mesh_dim,
00146                                            global_mesh_off, dfft.plan[i].new_mesh, 
00147                                            dfft.plan[i].start);  
00148     permute_ifield(dfft.plan[i].new_mesh,3,-(dfft.plan[i].n_permute));
00149     permute_ifield(dfft.plan[i].start,3,-(dfft.plan[i].n_permute));
00150     dfft.plan[i].n_ffts = dfft.plan[i].new_mesh[0]*dfft.plan[i].new_mesh[1];
00151 
00152     /* === send/recv block specifications === */
00153     for(j=0; j<dfft.plan[i].g_size; j++) {
00154       int k, node;
00155       /* send block: this_node to comm-group-node i (identity: node) */
00156       node = dfft.plan[i].group[j];
00157       dfft.plan[i].send_size[j] 
00158         = fft_calc_send_block(my_pos[i-1], n_grid[i-1], &(n_pos[i][3*node]), n_grid[i],
00159                               global_mesh_dim, global_mesh_off, &(dfft.plan[i].send_block[6*j]));
00160       permute_ifield(&(dfft.plan[i].send_block[6*j]),3,-(dfft.plan[i-1].n_permute));
00161       permute_ifield(&(dfft.plan[i].send_block[6*j+3]),3,-(dfft.plan[i-1].n_permute));
00162       if(dfft.plan[i].send_size[j] > dfft.max_comm_size) 
00163         dfft.max_comm_size = dfft.plan[i].send_size[j];
00164       /* First plan send blocks have to be adjusted, since the CA grid
00165          may have an additional margin outside the actual domain of the
00166          node */
00167       if(i==1) {
00168         for(k=0;k<3;k++) 
00169           dfft.plan[1].send_block[6*j+k  ] += local_mesh_margin[2*k];
00170       }
00171       /* recv block: this_node from comm-group-node i (identity: node) */
00172       dfft.plan[i].recv_size[j] 
00173         = fft_calc_send_block(my_pos[i], n_grid[i], &(n_pos[i-1][3*node]), n_grid[i-1],
00174                               global_mesh_dim, global_mesh_off,&(dfft.plan[i].recv_block[6*j]));
00175       permute_ifield(&(dfft.plan[i].recv_block[6*j]),3,-(dfft.plan[i].n_permute));
00176       permute_ifield(&(dfft.plan[i].recv_block[6*j+3]),3,-(dfft.plan[i].n_permute));
00177       if(dfft.plan[i].recv_size[j] > dfft.max_comm_size) 
00178         dfft.max_comm_size = dfft.plan[i].recv_size[j];
00179     }
00180 
00181     for(j=0;j<3;j++) dfft.plan[i].old_mesh[j] = dfft.plan[i-1].new_mesh[j];
00182     if(i==1) 
00183       dfft.plan[i].element = 1; 
00184     else {
00185       dfft.plan[i].element = 2;
00186       for(j=0; j<dfft.plan[i].g_size; j++) {
00187         dfft.plan[i].send_size[j] *= 2;
00188         dfft.plan[i].recv_size[j] *= 2;
00189       }
00190     }
00191     /* DEBUG */
00192     for(j=0;j<n_nodes;j++) {
00193       /* MPI_Barrier(comm_cart); */
00194       if(j==this_node) FFT_TRACE(fft_print_fft_plan(dfft.plan[i]));
00195     }
00196   }
00197 
00198   /* Factor 2 for complex fields */
00199   dfft.max_comm_size *= 2;
00200   dfft.max_mesh_size = (local_mesh_dim[0]*local_mesh_dim[1]*local_mesh_dim[2]);
00201   for(i=1;i<4;i++) 
00202     if(2*dfft.plan[i].new_size > dfft.max_mesh_size) dfft.max_mesh_size = 2*dfft.plan[i].new_size;
00203 
00204   FFT_TRACE(fprintf(stderr,"%d: dfft.max_comm_size = %d, dfft.max_mesh_size = %d\n",
00205                     this_node,dfft.max_comm_size,dfft.max_mesh_size));
00206 
00207   /* === pack function === */
00208   for(i=1;i<4;i++) {
00209     dfft.plan[i].pack_function = fft_pack_block_permute2; 
00210     FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 2 \n",this_node,i));
00211   }
00212   (*ks_pnum)=6;
00213   if(dfft.plan[1].row_dir==2) {
00214     dfft.plan[1].pack_function = fft_pack_block;
00215     FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 0 \n",this_node,1));
00216     (*ks_pnum)=4;
00217   }
00218   else if(dfft.plan[1].row_dir==1) {
00219     dfft.plan[1].pack_function = fft_pack_block_permute1;
00220     FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 1 \n",this_node,1));
00221     (*ks_pnum)=5;
00222   }
00223   
00224   /* Factor 2 for complex numbers */
00225   dfft.send_buf = (double *)realloc(dfft.send_buf, dfft.max_comm_size*sizeof(double));
00226   dfft.recv_buf = (double *)realloc(dfft.recv_buf, dfft.max_comm_size*sizeof(double));
00227   (*data)  = (double *)realloc((*data), dfft.max_mesh_size*sizeof(double));
00228   dfft.data_buf = (double *)realloc(dfft.data_buf, dfft.max_mesh_size*sizeof(double));
00229   if(!(*data) || !dfft.data_buf || !dfft.recv_buf || !dfft.send_buf) {
00230     fprintf(stderr,"%d: Could not allocate FFT data arays\n",this_node);
00231     errexit();
00232   }
00233 
00234   fftw_complex *c_data     = (fftw_complex *) (*data);
00235 
00236   /* === FFT Routines (Using FFTW / RFFTW package)=== */
00237   for(i=1;i<4;i++) {
00238     dfft.plan[i].dir = FFTW_FORWARD;   
00239     /* FFT plan creation. 
00240        Attention: destroys contents of c_data/data and c_data_buf/data_buf. */
00241     wisdom_status   = FFTW_FAILURE;
00242     sprintf(wisdom_file_name,"dfftw3_1d_wisdom_forw_n%d.file",
00243             dfft.plan[i].new_mesh[2]);
00244     if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) {
00245       wisdom_status = fftw_import_wisdom_from_file(wisdom_file);
00246       fclose(wisdom_file);
00247     }
00248     if(dfft.init_tag==1) fftw_destroy_plan(dfft.plan[i].fftw_plan);
00249 //printf("dfft.plan[%d].n_ffts=%d\n",i,dfft.plan[i].n_ffts);
00250     dfft.plan[i].fftw_plan =
00251       fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts,
00252                          c_data,NULL,1,dfft.plan[i].new_mesh[2],
00253                          c_data,NULL,1,dfft.plan[i].new_mesh[2],
00254                          dfft.plan[i].dir,FFTW_PATIENT);
00255     if( wisdom_status == FFTW_FAILURE && 
00256         (wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) {
00257       fftw_export_wisdom_to_file(wisdom_file);
00258       fclose(wisdom_file);
00259     }
00260     dfft.plan[i].fft_function = fftw_execute;        
00261   }
00262 
00263   /* === The BACK Direction === */
00264   /* this is needed because slightly different functions are used */
00265   for(i=1;i<4;i++) {
00266     dfft.back[i].dir = FFTW_BACKWARD;
00267     wisdom_status   = FFTW_FAILURE;
00268     sprintf(wisdom_file_name,"dfftw3_1d_wisdom_back_n%d.file",
00269             dfft.plan[i].new_mesh[2]);
00270     if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) {
00271       wisdom_status = fftw_import_wisdom_from_file(wisdom_file);
00272       fclose(wisdom_file);
00273     }    
00274     if(dfft.init_tag==1) fftw_destroy_plan(dfft.back[i].fftw_plan);
00275     dfft.back[i].fftw_plan =
00276       fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts,
00277                          c_data,NULL,1,dfft.plan[i].new_mesh[2],
00278                          c_data,NULL,1,dfft.plan[i].new_mesh[2],
00279                          dfft.back[i].dir,FFTW_PATIENT);
00280     if( wisdom_status == FFTW_FAILURE && 
00281         (wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) {
00282       fftw_export_wisdom_to_file(wisdom_file);
00283       fclose(wisdom_file);
00284     }
00285     dfft.back[i].fft_function = fftw_execute;
00286     dfft.back[i].pack_function = fft_pack_block_permute1;
00287     FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 1 \n",this_node,i));
00288   }
00289   if(dfft.plan[1].row_dir==2) {
00290     dfft.back[1].pack_function = fft_pack_block;
00291     FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 0 \n",this_node,1));
00292   }
00293   else if(dfft.plan[1].row_dir==1) {
00294     dfft.back[1].pack_function = fft_pack_block_permute2;
00295     FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 2 \n",this_node,1));
00296   }
00297   dfft.init_tag=1;
00298   /* free(data); */
00299   for(i=0;i<4;i++) { free(n_id[i]); free(n_pos[i]); }
00300   return dfft.max_mesh_size; 
00301 }
00302 
00303 
00304 void dfft_perform_forw(double *data)
00305 {
00306   int i;
00307   /* int m,n,o; */
00308   /* ===== first direction  ===== */
00309   FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_forw: dir 1:\n",this_node));
00310 
00311   fftw_complex *c_data     = (fftw_complex *) data;
00312   fftw_complex *c_data_buf = (fftw_complex *) dfft.data_buf;
00313 
00314   /* communication to current dir row format (in is data) */
00315   dfft_forw_grid_comm(dfft.plan[1], data, dfft.data_buf);
00316 
00317 
00318   /*
00319     fprintf(stderr,"%d: start grid \n",this_node);
00320     i=0;
00321     for(m=0;m<8;m++) {
00322     for(n=0;n<8;n++) {
00323     for(o=0;o<8;o++) {
00324     fprintf(stderr,"%.3f ",data_buf[i++]);
00325     }
00326     fprintf(stderr,"\n");
00327     }
00328     fprintf(stderr,"\n");
00329     }
00330   */
00331 
00332   /* complexify the real data array (in is data_buf) */
00333   for(i=0;i<dfft.plan[1].new_size;i++) {
00334     data[2*i]     = dfft.data_buf[i];     /* real value */
00335     data[(2*i)+1] = 0;       /* complex value */
00336   }
00337   /* perform FFT (in/out is data)*/
00338   fftw_execute_dft(dfft.plan[1].fftw_plan,c_data,c_data);
00339   /* ===== second direction ===== */
00340   FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_forw: dir 2:\n",this_node));
00341   /* communication to current dir row format (in is data) */
00342   dfft_forw_grid_comm(dfft.plan[2], data, dfft.data_buf);
00343   /* perform FFT (in/out is data_buf)*/
00344   fftw_execute_dft(dfft.plan[2].fftw_plan,c_data_buf,c_data_buf);
00345   /* ===== third direction  ===== */
00346   FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_forw: dir 3:\n",this_node));
00347   /* communication to current dir row format (in is data_buf) */
00348   dfft_forw_grid_comm(dfft.plan[3], dfft.data_buf, data);
00349   /* perform FFT (in/out is data)*/
00350   fftw_execute_dft(dfft.plan[3].fftw_plan,c_data,c_data);
00351   //fft_print_global_fft_mesh(dfft.plan[3],data,1,0);
00352 
00353   /* REMARK: Result has to be in data. */
00354 }
00355 
00356 
00357 void dfft_perform_back(double *data)
00358 {
00359   int i;
00360 
00361   fftw_complex *c_data     = (fftw_complex *) data;
00362   fftw_complex *c_data_buf = (fftw_complex *) dfft.data_buf;
00363   
00364   /* ===== third direction  ===== */
00365   FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_back: dir 3:\n",this_node));
00366 
00367 
00368   /* perform FFT (in is data) */
00369   fftw_execute_dft(dfft.back[3].fftw_plan,c_data,c_data);
00370   /* communicate (in is data)*/
00371   dfft_back_grid_comm(dfft.plan[3],dfft.back[3],data,dfft.data_buf);
00372  
00373   /* ===== second direction ===== */
00374   FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_back: dir 2:\n",this_node));
00375   /* perform FFT (in is data_buf) */
00376   fftw_execute_dft(dfft.back[2].fftw_plan,c_data_buf,c_data_buf);
00377   /* communicate (in is data_buf) */
00378   dfft_back_grid_comm(dfft.plan[2],dfft.back[2],dfft.data_buf,data);
00379 
00380   /* ===== first direction  ===== */
00381   FFT_TRACE(fprintf(stderr,"%d: fft_perform_back: dir 1:\n",this_node));
00382   /* perform FFT (in is data) */
00383   fftw_execute_dft(dfft.back[1].fftw_plan,c_data,c_data);
00384   /* throw away the (hopefully) empty complex component (in is data)*/
00385   for(i=0;i<dfft.plan[1].new_size;i++) {
00386     dfft.data_buf[i] = data[2*i]; /* real value */
00387     //Vincent:
00388     if (data[2*i+1]>1e-5) {
00389       printf("dipoar fft - Complex value is not zero (i=%d,data=%g)!!!\n",i,data[2*i+1]);
00390       if (i>100) exit(-1);
00391     }
00392   }
00393   /* communicate (in is data_buf) */
00394   dfft_back_grid_comm(dfft.plan[1],dfft.back[1],dfft.data_buf,data);
00395 
00396 
00397   /* REMARK: Result has to be in data. */
00398 }
00399 
00400 void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out)
00401 {
00402   int i;
00403   MPI_Status status;
00404   double *tmp_ptr;
00405 
00406   for(i=0;i<plan.g_size;i++) {   
00407     plan.pack_function(in, dfft.send_buf, &(plan.send_block[6*i]), 
00408                        &(plan.send_block[6*i+3]), plan.old_mesh, plan.element);
00409 
00410     if(plan.group[i]<this_node) {       /* send first, receive second */
00411       MPI_Send(dfft.send_buf, plan.send_size[i], MPI_DOUBLE, 
00412                plan.group[i], REQ_FFT_FORW, comm_cart);
00413       MPI_Recv(dfft.recv_buf, plan.recv_size[i], MPI_DOUBLE, 
00414                plan.group[i], REQ_FFT_FORW, comm_cart, &status);        
00415     }
00416     else if(plan.group[i]>this_node) {  /* receive first, send second */
00417       MPI_Recv(dfft.recv_buf, plan.recv_size[i], MPI_DOUBLE, 
00418                plan.group[i], REQ_FFT_FORW, comm_cart, &status);        
00419       MPI_Send(dfft.send_buf, plan.send_size[i], MPI_DOUBLE, 
00420                plan.group[i], REQ_FFT_FORW, comm_cart);      
00421     }
00422     else {                              /* Self communication... */   
00423       tmp_ptr  = dfft.send_buf;
00424       dfft.send_buf = dfft.recv_buf;
00425       dfft.recv_buf = tmp_ptr;
00426     }
00427     fft_unpack_block(dfft.recv_buf, out, &(plan.recv_block[6*i]), 
00428                  &(plan.recv_block[6*i+3]), plan.new_mesh, plan.element);
00429   }
00430 }
00431 
00432 void dfft_back_grid_comm(fft_forw_plan plan_f,  fft_back_plan plan_b, double *in, double *out)
00433 {
00434   int i;
00435   MPI_Status status;
00436   double *tmp_ptr;
00437 
00438   /* Back means: Use the send/recieve stuff from the forward plan but
00439      replace the recieve blocks by the send blocks and vice
00440      versa. Attention then also new_mesh and old_mesh are exchanged */
00441 
00442   for(i=0;i<plan_f.g_size;i++) {
00443     
00444     plan_b.pack_function(in, dfft.send_buf, &(plan_f.recv_block[6*i]), 
00445                        &(plan_f.recv_block[6*i+3]), plan_f.new_mesh, plan_f.element);
00446 
00447     if(plan_f.group[i]<this_node) {       /* send first, receive second */
00448       MPI_Send(dfft.send_buf, plan_f.recv_size[i], MPI_DOUBLE, 
00449                plan_f.group[i], REQ_FFT_BACK, comm_cart);
00450       MPI_Recv(dfft.recv_buf, plan_f.send_size[i], MPI_DOUBLE, 
00451                plan_f.group[i], REQ_FFT_BACK, comm_cart, &status);      
00452     }
00453     else if(plan_f.group[i]>this_node) {  /* receive first, send second */
00454       MPI_Recv(dfft.recv_buf, plan_f.send_size[i], MPI_DOUBLE, 
00455                plan_f.group[i], REQ_FFT_BACK, comm_cart, &status);      
00456       MPI_Send(dfft.send_buf, plan_f.recv_size[i], MPI_DOUBLE, 
00457                plan_f.group[i], REQ_FFT_BACK, comm_cart);      
00458     }
00459     else {                                /* Self communication... */   
00460       tmp_ptr  = dfft.send_buf;
00461       dfft.send_buf = dfft.recv_buf;
00462       dfft.recv_buf = tmp_ptr;
00463     }
00464     fft_unpack_block(dfft.recv_buf, out, &(plan_f.send_block[6*i]), 
00465                  &(plan_f.send_block[6*i+3]), plan_f.old_mesh, plan_f.element);
00466   }
00467 }
00468 
00469 
00470 #endif /* DP3M */