ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
lbgpu_cfile.c
Go to the documentation of this file.
00001 /* 
00002    Copyright (C) 2010,2011,2012,2013 The ESPResSo project
00003 
00004    This file is part of ESPResSo.
00005   
00006    ESPResSo is free software: you can redistribute it and/or modify
00007    it under the terms of the GNU General Public License as published by
00008    the Free Software Foundation, either version 3 of the License, or
00009    (at your option) any later version.
00010    
00011    ESPResSo is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014    GNU General Public License for more details.
00015    
00016    You should have received a copy of the GNU General Public License
00017    along with this program.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 /** \file lbgpu_cfile.c
00021  *
00022  * C file for the Lattice Boltzmann implementation on GPUs.
00023  * Header file for \ref lbgpu.h.
00024  */
00025 #include <mpi.h>
00026 #include <stdio.h>
00027 #include <math.h>
00028 #include <stdlib.h>
00029 #include <time.h>
00030 #include <string.h>
00031 #include "lbgpu.h"
00032 #include "utils.h"
00033 #include "communication.h"
00034 #include "thermostat.h"
00035 #include "grid.h"
00036 #include "domain_decomposition.h"
00037 #include "integrate.h"
00038 #include "interaction_data.h"
00039 #include "particle_data.h"
00040 #include "global.h"
00041 #include "lb-boundaries.h"
00042 #ifdef LB_GPU
00043 
00044 /** Action number for \ref mpi_get_particles. */
00045 #define REQ_GETPARTS  16
00046 #ifndef D3Q19
00047 #error The implementation only works for D3Q19 so far!
00048 #endif
00049 
00050 /** Struct holding the Lattice Boltzmann parameters */
00051 LB_parameters_gpu lbpar_gpu = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 1.0 ,0.0, -1.0, 0, 0, 0, 0, 0, 0, 1, 0, {0.0, 0.0, 0.0}, 12345, 0};
00052 LB_values_gpu *host_values = NULL;
00053 LB_nodes_gpu *host_nodes = NULL;
00054 LB_particle_force_gpu *host_forces = NULL;
00055 LB_particle_gpu *host_data = NULL;
00056 
00057 /** Flag indicating momentum exchange between particles and fluid */
00058 int transfer_momentum_gpu = 0;
00059 
00060 static int max_ran = 1000000;
00061 /*@}*/
00062 //static double tau;
00063 
00064 /** measures the MD time since the last fluid update */
00065 static int fluidstep = 0;
00066 
00067 /** c_sound_square in LB units*/
00068 static float c_sound_sq = 1.f/3.f;
00069 
00070 //clock_t start, end;
00071 int i;
00072 
00073 static void mpi_get_particles_lb(LB_particle_gpu *host_result);
00074 static void mpi_get_particles_slave_lb();
00075 static void mpi_send_forces_lb(LB_particle_force_gpu *host_forces);
00076 static void mpi_send_forces_slave_lb();
00077 
00078 int n_extern_nodeforces = 0;
00079 LB_extern_nodeforce_gpu *host_extern_nodeforces = NULL;
00080 
00081 /*-----------------------------------------------------------*/
00082 /** main of lb_gpu_programm */
00083 /*-----------------------------------------------------------*/
00084 /** lattice boltzmann update gpu called from integrate.c
00085 */
00086 void lattice_boltzmann_update_gpu() {
00087 
00088   int factor = (int)round(lbpar_gpu.tau/time_step);
00089 
00090   fluidstep += 1;
00091 
00092   if (fluidstep>=factor) {
00093     fluidstep=0;
00094 
00095     lb_integrate_GPU();
00096 
00097     LB_TRACE (fprintf(stderr,"lb_integrate_GPU \n"));
00098 
00099   }
00100 }
00101 
00102 /** Calculate particle lattice interactions called from forces.c
00103 */
00104 void lb_calc_particle_lattice_ia_gpu() {
00105 
00106   if (transfer_momentum_gpu) {
00107     mpi_get_particles_lb(host_data);
00108 
00109     if(this_node == 0){
00110 #if 0
00111       for (i=0;i<n_total_particles;i++) {
00112         fprintf(stderr, "%i particle posi: , %f %f %f\n", i, host_data[i].p[0], host_data[i].p[1], host_data[i].p[2]);
00113       }
00114 #endif
00115 
00116     if(lbpar_gpu.number_of_particles) lb_particle_GPU(host_data);
00117 
00118     LB_TRACE (fprintf(stderr,"lb_calc_particle_lattice_ia_gpu \n"));
00119 
00120     }
00121   }
00122 }
00123 
00124 /**copy forces from gpu to cpu and call mpi routines to add forces to particles
00125 */
00126 void lb_send_forces_gpu(){
00127 
00128   if (transfer_momentum_gpu) {
00129     if(this_node == 0){
00130       if (lbpar_gpu.number_of_particles) lb_copy_forces_GPU(host_forces);
00131 
00132       LB_TRACE (fprintf(stderr,"lb_send_forces_gpu \n"));
00133 #if 0
00134         for (i=0;i<n_total_particles;i++) {
00135           fprintf(stderr, "%i particle forces , %f %f %f \n", i, host_forces[i].f[0], host_forces[i].f[1], host_forces[i].f[2]);
00136         }
00137 #endif
00138     }
00139     mpi_send_forces_lb(host_forces);
00140   }
00141 }
00142 
00143 /** (re-) allocation of the memory need for the particles (cpu part)*/
00144 void lb_realloc_particles_gpu(){
00145 
00146   lbpar_gpu.number_of_particles = n_total_particles;
00147   LB_TRACE (printf("#particles realloc\t %u \n", lbpar_gpu.number_of_particles));
00148   //fprintf(stderr, "%u \t \n", lbpar_gpu.number_of_particles);
00149   /**-----------------------------------------------------*/
00150   /** allocating of the needed memory for several structs */
00151   /**-----------------------------------------------------*/
00152   /**Allocate struct for particle forces */
00153   size_t size_of_forces = lbpar_gpu.number_of_particles * sizeof(LB_particle_force_gpu);
00154   host_forces = realloc(host_forces, size_of_forces);
00155 
00156   lbpar_gpu.your_seed = (unsigned int)i_random(max_ran);
00157 
00158   LB_TRACE (fprintf(stderr,"test your_seed %u \n", lbpar_gpu.your_seed));
00159   lb_realloc_particle_GPU(&lbpar_gpu, &host_data);
00160 }
00161 /** (Re-)initializes the fluid according to the given value of rho. */
00162 void lb_reinit_fluid_gpu() {
00163 
00164   //lbpar_gpu.your_seed = (unsigned int)i_random(max_ran);
00165   lb_reinit_parameters_gpu();
00166   if(lbpar_gpu.number_of_nodes != 0){
00167     lb_reinit_GPU(&lbpar_gpu);
00168     lbpar_gpu.reinit = 1;
00169   }
00170 
00171   LB_TRACE (fprintf(stderr,"lb_reinit_fluid_gpu \n"));
00172 }
00173 
00174 /** Release the fluid. */
00175 /*not needed in Espresso but still not deleted*/
00176 void lb_release_gpu(){
00177 
00178   free(host_nodes);
00179   free(host_values);
00180   free(host_forces);
00181   free(host_data);
00182 }
00183 /** (Re-)initializes the fluid. */
00184 void lb_reinit_parameters_gpu() {
00185 
00186   lbpar_gpu.mu = 0.0;
00187   lbpar_gpu.time_step = (float)time_step;
00188 
00189   if (lbpar_gpu.viscosity > 0.0) {
00190     /* Eq. (80) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */
00191     lbpar_gpu.gamma_shear = 1. - 2./(6.*lbpar_gpu.viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.);   
00192   }
00193 
00194   if (lbpar_gpu.bulk_viscosity > 0.0) {
00195     /* Eq. (81) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */
00196     lbpar_gpu.gamma_bulk = 1. - 2./(9.*lbpar_gpu.bulk_viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.);
00197   }
00198 
00199   if (temperature > 0.0) {  /* fluctuating hydrodynamics ? */
00200 
00201     lbpar_gpu.fluct = 1;
00202         LB_TRACE (fprintf(stderr, "fluct on \n"));
00203     /* Eq. (51) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007).*/
00204     /* Note that the modes are not normalized as in the paper here! */
00205 
00206     lbpar_gpu.mu = (float)temperature/c_sound_sq*lbpar_gpu.tau*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid);
00207     //lbpar_gpu->mu *= agrid*agrid*agrid;  // Marcello's conjecture
00208 
00209     /* lb_coupl_pref is stored in MD units (force)
00210      * Eq. (16) Ahlrichs and Duenweg, JCP 111(17):8225 (1999).
00211      * The factor 12 comes from the fact that we use random numbers
00212      * from -0.5 to 0.5 (equally distributed) which have variance 1/12.
00213      * time_step comes from the discretization.
00214      */
00215 
00216     lbpar_gpu.lb_coupl_pref = sqrt(12.f*2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step);
00217     lbpar_gpu.lb_coupl_pref2 = sqrt(2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step);
00218 
00219   } else {
00220     /* no fluctuations at zero temperature */
00221     lbpar_gpu.fluct = 0;
00222     lbpar_gpu.lb_coupl_pref = 0.0;
00223     lbpar_gpu.lb_coupl_pref2 = 0.0;
00224   }
00225         LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n"));
00226 
00227   reinit_parameters_GPU(&lbpar_gpu);
00228 }
00229 
00230 /** Performs a full initialization of
00231  *  the Lattice Boltzmann system. All derived parameters
00232  *  and the fluid are reset to their default values. */
00233 void lb_init_gpu() {
00234 
00235   LB_TRACE(printf("Begin initialzing fluid on GPU\n"));
00236   /** set parameters for transfer to gpu */
00237   lb_reinit_parameters_gpu();
00238 
00239   lb_realloc_particles_gpu();
00240         
00241   lb_init_GPU(&lbpar_gpu);
00242 
00243   LB_TRACE(printf("Initialzing fluid on GPU successful\n"));
00244 }
00245 
00246 /*@}*/
00247 
00248 /***********************************************************************/
00249 /** \name MPI stuff */
00250 /***********************************************************************/
00251 
00252 /*************** REQ_GETPARTS ************/
00253 static void mpi_get_particles_lb(LB_particle_gpu *host_data)
00254 {
00255   int n_part;
00256   int g, pnode;
00257   Cell *cell;
00258   int c;
00259   MPI_Status status;
00260 
00261   int i;        
00262   int *sizes;
00263   sizes = malloc(sizeof(int)*n_nodes);
00264 
00265   n_part = cells_get_n_particles();
00266 
00267   /* first collect number of particles on each node */
00268   MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);
00269 
00270   /* just check if the number of particles is correct */
00271   if(this_node > 0){
00272     /* call slave functions to provide the slave datas */
00273     mpi_get_particles_slave_lb();
00274   }
00275   else {
00276     /* master: fetch particle informations into 'result' */
00277     g = 0;
00278     for (pnode = 0; pnode < n_nodes; pnode++) {
00279       if (sizes[pnode] > 0) {
00280         if (pnode == 0) {
00281           for (c = 0; c < local_cells.n; c++) {
00282             Particle *part;
00283             int npart;  
00284             int dummy[3] = {0,0,0};
00285             double pos[3];
00286             cell = local_cells.cell[c];
00287             part = cell->part;
00288             npart = cell->n;
00289             for (i=0;i<npart;i++) {
00290               memcpy(pos, part[i].r.p, 3*sizeof(double));
00291               fold_position(pos, dummy);
00292               host_data[i+g].p[0] = (float)pos[0];
00293               host_data[i+g].p[1] = (float)pos[1];
00294               host_data[i+g].p[2] = (float)pos[2];
00295                                                                 
00296               host_data[i+g].v[0] = (float)part[i].m.v[0];
00297               host_data[i+g].v[1] = (float)part[i].m.v[1];
00298               host_data[i+g].v[2] = (float)part[i].m.v[2];
00299 #ifdef LB_ELECTROHYDRODYNAMICS
00300               host_data[i+g].mu_E[0] = (float)part[i].p.mu_E[0];
00301               host_data[i+g].mu_E[1] = (float)part[i].p.mu_E[1];
00302               host_data[i+g].mu_E[2] = (float)part[i].p.mu_E[2];
00303 #endif
00304             }  
00305             g += npart;
00306           }  
00307         }
00308         else {
00309           MPI_Recv(&host_data[g], sizes[pnode]*sizeof(LB_particle_gpu), MPI_BYTE, pnode, REQ_GETPARTS,
00310           comm_cart, &status);
00311           g += sizes[pnode];
00312         }
00313       }
00314     }
00315   }
00316   COMM_TRACE(fprintf(stderr, "%d: finished get\n", this_node));
00317   free(sizes);
00318 }
00319 
00320 static void mpi_get_particles_slave_lb(){
00321  
00322   int n_part;
00323   int g;
00324   LB_particle_gpu *host_data_sl;
00325   Cell *cell;
00326   int c, i;
00327 
00328   n_part = cells_get_n_particles();
00329 
00330   COMM_TRACE(fprintf(stderr, "%d: get_particles_slave, %d particles\n", this_node, n_part));
00331 
00332   if (n_part > 0) {
00333     /* get (unsorted) particle informations as an array of type 'particle' */
00334     /* then get the particle information */
00335     host_data_sl = malloc(n_part*sizeof(LB_particle_gpu));
00336     
00337     g = 0;
00338     for (c = 0; c < local_cells.n; c++) {
00339       Particle *part;
00340       int npart;
00341       int dummy[3] = {0,0,0};
00342       double pos[3];
00343       cell = local_cells.cell[c];
00344       part = cell->part;
00345       npart = cell->n;
00346 
00347       for (i=0;i<npart;i++) {
00348         memcpy(pos, part[i].r.p, 3*sizeof(double));
00349         fold_position(pos, dummy);      
00350                         
00351         host_data_sl[i+g].p[0] = (float)pos[0];
00352         host_data_sl[i+g].p[1] = (float)pos[1];
00353         host_data_sl[i+g].p[2] = (float)pos[2];
00354 
00355         host_data_sl[i+g].v[0] = (float)part[i].m.v[0];
00356         host_data_sl[i+g].v[1] = (float)part[i].m.v[1];
00357         host_data_sl[i+g].v[2] = (float)part[i].m.v[2];
00358 #ifdef LB_ELECTROHYDRODYNAMICS
00359         host_data_sl[i+g].mu_E[0] = (float)part[i].p.mu_E[0];
00360         host_data_sl[i+g].mu_E[1] = (float)part[i].p.mu_E[1];
00361         host_data_sl[i+g].mu_E[2] = (float)part[i].p.mu_E[2];
00362 #endif
00363       }
00364       g+=npart;
00365     }
00366     /* and send it back to the master node */
00367     MPI_Send(host_data_sl, n_part*sizeof(LB_particle_gpu), MPI_BYTE, 0, REQ_GETPARTS, comm_cart);
00368     free(host_data_sl);
00369   }  
00370 }
00371 
00372 static void mpi_send_forces_lb(LB_particle_force_gpu *host_forces){
00373         
00374   int n_part;
00375   int g, pnode;
00376   Cell *cell;
00377   int c;
00378   int i;        
00379   int *sizes;
00380   sizes = malloc(sizeof(int)*n_nodes);
00381   n_part = cells_get_n_particles();
00382   /* first collect number of particles on each node */
00383   MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);
00384 
00385   /* call slave functions to provide the slave datas */
00386   if(this_node > 0) {
00387     mpi_send_forces_slave_lb();
00388   }
00389   else{
00390   /* fetch particle informations into 'result' */
00391   g = 0;
00392     for (pnode = 0; pnode < n_nodes; pnode++) {
00393       if (sizes[pnode] > 0) {
00394         if (pnode == 0) {
00395           for (c = 0; c < local_cells.n; c++) {
00396             int npart;  
00397             cell = local_cells.cell[c];
00398             npart = cell->n;
00399             for (i=0;i<npart;i++) {
00400               cell->part[i].f.f[0] += (double)host_forces[i+g].f[0];
00401               cell->part[i].f.f[1] += (double)host_forces[i+g].f[1];
00402               cell->part[i].f.f[2] += (double)host_forces[i+g].f[2];
00403             }
00404             g += npart;
00405           }
00406         }
00407         else {
00408         /* and send it back to the slave node */
00409         MPI_Send(&host_forces[g], sizes[pnode]*sizeof(LB_particle_force_gpu), MPI_BYTE, pnode, REQ_GETPARTS, comm_cart);                        
00410         g += sizes[pnode];
00411         }
00412       }
00413     }
00414   }
00415   COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node));
00416 
00417   free(sizes);
00418 }
00419 
00420 static void mpi_send_forces_slave_lb(){
00421 
00422   int n_part;
00423   LB_particle_force_gpu *host_forces_sl;
00424   Cell *cell;
00425   int c, i;
00426   MPI_Status status;
00427 
00428   n_part = cells_get_n_particles();
00429 
00430   COMM_TRACE(fprintf(stderr, "%d: send_particles_slave, %d particles\n", this_node, n_part));
00431 
00432 
00433   if (n_part > 0) {
00434     int g = 0;
00435     /* get (unsorted) particle informations as an array of type 'particle' */
00436     /* then get the particle information */
00437     host_forces_sl = malloc(n_part*sizeof(LB_particle_force_gpu));
00438     MPI_Recv(host_forces_sl, n_part*sizeof(LB_particle_force_gpu), MPI_BYTE, 0, REQ_GETPARTS,
00439     comm_cart, &status);
00440     for (c = 0; c < local_cells.n; c++) {
00441       int npart;        
00442       cell = local_cells.cell[c];
00443       npart = cell->n;
00444       for (i=0;i<npart;i++) {
00445         cell->part[i].f.f[0] += (double)host_forces_sl[i+g].f[0];
00446         cell->part[i].f.f[1] += (double)host_forces_sl[i+g].f[1];
00447         cell->part[i].f.f[2] += (double)host_forces_sl[i+g].f[2];
00448       }
00449       g += npart;
00450     }
00451     free(host_forces_sl);
00452   } 
00453 }
00454 /*@}*/
00455 
00456 int lb_lbnode_set_extforce_GPU(int ind[3], double f[3])
00457 {
00458   if ( ind[0] < 0 || ind[0] >=  lbpar_gpu.dim_x ||
00459        ind[1] < 0 || ind[1] >= lbpar_gpu.dim_y ||
00460        ind[2] < 0 || ind[2] >= lbpar_gpu.dim_z )
00461     return ES_ERROR;
00462 
00463   unsigned int index =
00464     ind[0] + ind[1]*lbpar_gpu.dim_x + ind[2]*lbpar_gpu.dim_x*lbpar_gpu.dim_y;
00465 
00466   size_t  size_of_extforces = (n_extern_nodeforces+1)*sizeof(LB_extern_nodeforce_gpu);
00467   host_extern_nodeforces = realloc(host_extern_nodeforces, size_of_extforces);
00468   
00469   host_extern_nodeforces[n_extern_nodeforces].force[0] = (float)f[0];
00470   host_extern_nodeforces[n_extern_nodeforces].force[1] = (float)f[1];
00471   host_extern_nodeforces[n_extern_nodeforces].force[2] = (float)f[2];
00472   
00473   host_extern_nodeforces[n_extern_nodeforces].index = index;
00474   n_extern_nodeforces++;
00475   
00476   if(lbpar_gpu.external_force == 0)lbpar_gpu.external_force = 1;
00477 
00478   lb_init_extern_nodeforces_GPU(n_extern_nodeforces, host_extern_nodeforces, &lbpar_gpu);
00479 
00480   return ES_OK;
00481 }
00482 
00483 #endif /* LB_GPU */