![]() |
ESPResSo 3.2.0-11-g9950804-git
Extensible Simulation Package for Soft Matter Research
|
00001 /* 00002 Copyright (C) 2010,2011,2012,2013 The ESPResSo project 00003 00004 This file is part of ESPResSo. 00005 00006 ESPResSo is free software: you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation, either version 3 of the License, or 00009 (at your option) any later version. 00010 00011 ESPResSo is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 */ 00019 00020 /** \file lbgpu_cfile.c 00021 * 00022 * C file for the Lattice Boltzmann implementation on GPUs. 00023 * Header file for \ref lbgpu.h. 00024 */ 00025 #include <mpi.h> 00026 #include <stdio.h> 00027 #include <math.h> 00028 #include <stdlib.h> 00029 #include <time.h> 00030 #include <string.h> 00031 #include "lbgpu.h" 00032 #include "utils.h" 00033 #include "communication.h" 00034 #include "thermostat.h" 00035 #include "grid.h" 00036 #include "domain_decomposition.h" 00037 #include "integrate.h" 00038 #include "interaction_data.h" 00039 #include "particle_data.h" 00040 #include "global.h" 00041 #include "lb-boundaries.h" 00042 #ifdef LB_GPU 00043 00044 /** Action number for \ref mpi_get_particles. */ 00045 #define REQ_GETPARTS 16 00046 #ifndef D3Q19 00047 #error The implementation only works for D3Q19 so far! 00048 #endif 00049 00050 /** Struct holding the Lattice Boltzmann parameters */ 00051 LB_parameters_gpu lbpar_gpu = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 1.0 ,0.0, -1.0, 0, 0, 0, 0, 0, 0, 1, 0, {0.0, 0.0, 0.0}, 12345, 0}; 00052 LB_values_gpu *host_values = NULL; 00053 LB_nodes_gpu *host_nodes = NULL; 00054 LB_particle_force_gpu *host_forces = NULL; 00055 LB_particle_gpu *host_data = NULL; 00056 00057 /** Flag indicating momentum exchange between particles and fluid */ 00058 int transfer_momentum_gpu = 0; 00059 00060 static int max_ran = 1000000; 00061 /*@}*/ 00062 //static double tau; 00063 00064 /** measures the MD time since the last fluid update */ 00065 static int fluidstep = 0; 00066 00067 /** c_sound_square in LB units*/ 00068 static float c_sound_sq = 1.f/3.f; 00069 00070 //clock_t start, end; 00071 int i; 00072 00073 static void mpi_get_particles_lb(LB_particle_gpu *host_result); 00074 static void mpi_get_particles_slave_lb(); 00075 static void mpi_send_forces_lb(LB_particle_force_gpu *host_forces); 00076 static void mpi_send_forces_slave_lb(); 00077 00078 int n_extern_nodeforces = 0; 00079 LB_extern_nodeforce_gpu *host_extern_nodeforces = NULL; 00080 00081 /*-----------------------------------------------------------*/ 00082 /** main of lb_gpu_programm */ 00083 /*-----------------------------------------------------------*/ 00084 /** lattice boltzmann update gpu called from integrate.c 00085 */ 00086 void lattice_boltzmann_update_gpu() { 00087 00088 int factor = (int)round(lbpar_gpu.tau/time_step); 00089 00090 fluidstep += 1; 00091 00092 if (fluidstep>=factor) { 00093 fluidstep=0; 00094 00095 lb_integrate_GPU(); 00096 00097 LB_TRACE (fprintf(stderr,"lb_integrate_GPU \n")); 00098 00099 } 00100 } 00101 00102 /** Calculate particle lattice interactions called from forces.c 00103 */ 00104 void lb_calc_particle_lattice_ia_gpu() { 00105 00106 if (transfer_momentum_gpu) { 00107 mpi_get_particles_lb(host_data); 00108 00109 if(this_node == 0){ 00110 #if 0 00111 for (i=0;i<n_total_particles;i++) { 00112 fprintf(stderr, "%i particle posi: , %f %f %f\n", i, host_data[i].p[0], host_data[i].p[1], host_data[i].p[2]); 00113 } 00114 #endif 00115 00116 if(lbpar_gpu.number_of_particles) lb_particle_GPU(host_data); 00117 00118 LB_TRACE (fprintf(stderr,"lb_calc_particle_lattice_ia_gpu \n")); 00119 00120 } 00121 } 00122 } 00123 00124 /**copy forces from gpu to cpu and call mpi routines to add forces to particles 00125 */ 00126 void lb_send_forces_gpu(){ 00127 00128 if (transfer_momentum_gpu) { 00129 if(this_node == 0){ 00130 if (lbpar_gpu.number_of_particles) lb_copy_forces_GPU(host_forces); 00131 00132 LB_TRACE (fprintf(stderr,"lb_send_forces_gpu \n")); 00133 #if 0 00134 for (i=0;i<n_total_particles;i++) { 00135 fprintf(stderr, "%i particle forces , %f %f %f \n", i, host_forces[i].f[0], host_forces[i].f[1], host_forces[i].f[2]); 00136 } 00137 #endif 00138 } 00139 mpi_send_forces_lb(host_forces); 00140 } 00141 } 00142 00143 /** (re-) allocation of the memory need for the particles (cpu part)*/ 00144 void lb_realloc_particles_gpu(){ 00145 00146 lbpar_gpu.number_of_particles = n_total_particles; 00147 LB_TRACE (printf("#particles realloc\t %u \n", lbpar_gpu.number_of_particles)); 00148 //fprintf(stderr, "%u \t \n", lbpar_gpu.number_of_particles); 00149 /**-----------------------------------------------------*/ 00150 /** allocating of the needed memory for several structs */ 00151 /**-----------------------------------------------------*/ 00152 /**Allocate struct for particle forces */ 00153 size_t size_of_forces = lbpar_gpu.number_of_particles * sizeof(LB_particle_force_gpu); 00154 host_forces = realloc(host_forces, size_of_forces); 00155 00156 lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); 00157 00158 LB_TRACE (fprintf(stderr,"test your_seed %u \n", lbpar_gpu.your_seed)); 00159 lb_realloc_particle_GPU(&lbpar_gpu, &host_data); 00160 } 00161 /** (Re-)initializes the fluid according to the given value of rho. */ 00162 void lb_reinit_fluid_gpu() { 00163 00164 //lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); 00165 lb_reinit_parameters_gpu(); 00166 if(lbpar_gpu.number_of_nodes != 0){ 00167 lb_reinit_GPU(&lbpar_gpu); 00168 lbpar_gpu.reinit = 1; 00169 } 00170 00171 LB_TRACE (fprintf(stderr,"lb_reinit_fluid_gpu \n")); 00172 } 00173 00174 /** Release the fluid. */ 00175 /*not needed in Espresso but still not deleted*/ 00176 void lb_release_gpu(){ 00177 00178 free(host_nodes); 00179 free(host_values); 00180 free(host_forces); 00181 free(host_data); 00182 } 00183 /** (Re-)initializes the fluid. */ 00184 void lb_reinit_parameters_gpu() { 00185 00186 lbpar_gpu.mu = 0.0; 00187 lbpar_gpu.time_step = (float)time_step; 00188 00189 if (lbpar_gpu.viscosity > 0.0) { 00190 /* Eq. (80) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ 00191 lbpar_gpu.gamma_shear = 1. - 2./(6.*lbpar_gpu.viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); 00192 } 00193 00194 if (lbpar_gpu.bulk_viscosity > 0.0) { 00195 /* Eq. (81) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ 00196 lbpar_gpu.gamma_bulk = 1. - 2./(9.*lbpar_gpu.bulk_viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); 00197 } 00198 00199 if (temperature > 0.0) { /* fluctuating hydrodynamics ? */ 00200 00201 lbpar_gpu.fluct = 1; 00202 LB_TRACE (fprintf(stderr, "fluct on \n")); 00203 /* Eq. (51) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007).*/ 00204 /* Note that the modes are not normalized as in the paper here! */ 00205 00206 lbpar_gpu.mu = (float)temperature/c_sound_sq*lbpar_gpu.tau*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid); 00207 //lbpar_gpu->mu *= agrid*agrid*agrid; // Marcello's conjecture 00208 00209 /* lb_coupl_pref is stored in MD units (force) 00210 * Eq. (16) Ahlrichs and Duenweg, JCP 111(17):8225 (1999). 00211 * The factor 12 comes from the fact that we use random numbers 00212 * from -0.5 to 0.5 (equally distributed) which have variance 1/12. 00213 * time_step comes from the discretization. 00214 */ 00215 00216 lbpar_gpu.lb_coupl_pref = sqrt(12.f*2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step); 00217 lbpar_gpu.lb_coupl_pref2 = sqrt(2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step); 00218 00219 } else { 00220 /* no fluctuations at zero temperature */ 00221 lbpar_gpu.fluct = 0; 00222 lbpar_gpu.lb_coupl_pref = 0.0; 00223 lbpar_gpu.lb_coupl_pref2 = 0.0; 00224 } 00225 LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n")); 00226 00227 reinit_parameters_GPU(&lbpar_gpu); 00228 } 00229 00230 /** Performs a full initialization of 00231 * the Lattice Boltzmann system. All derived parameters 00232 * and the fluid are reset to their default values. */ 00233 void lb_init_gpu() { 00234 00235 LB_TRACE(printf("Begin initialzing fluid on GPU\n")); 00236 /** set parameters for transfer to gpu */ 00237 lb_reinit_parameters_gpu(); 00238 00239 lb_realloc_particles_gpu(); 00240 00241 lb_init_GPU(&lbpar_gpu); 00242 00243 LB_TRACE(printf("Initialzing fluid on GPU successful\n")); 00244 } 00245 00246 /*@}*/ 00247 00248 /***********************************************************************/ 00249 /** \name MPI stuff */ 00250 /***********************************************************************/ 00251 00252 /*************** REQ_GETPARTS ************/ 00253 static void mpi_get_particles_lb(LB_particle_gpu *host_data) 00254 { 00255 int n_part; 00256 int g, pnode; 00257 Cell *cell; 00258 int c; 00259 MPI_Status status; 00260 00261 int i; 00262 int *sizes; 00263 sizes = malloc(sizeof(int)*n_nodes); 00264 00265 n_part = cells_get_n_particles(); 00266 00267 /* first collect number of particles on each node */ 00268 MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); 00269 00270 /* just check if the number of particles is correct */ 00271 if(this_node > 0){ 00272 /* call slave functions to provide the slave datas */ 00273 mpi_get_particles_slave_lb(); 00274 } 00275 else { 00276 /* master: fetch particle informations into 'result' */ 00277 g = 0; 00278 for (pnode = 0; pnode < n_nodes; pnode++) { 00279 if (sizes[pnode] > 0) { 00280 if (pnode == 0) { 00281 for (c = 0; c < local_cells.n; c++) { 00282 Particle *part; 00283 int npart; 00284 int dummy[3] = {0,0,0}; 00285 double pos[3]; 00286 cell = local_cells.cell[c]; 00287 part = cell->part; 00288 npart = cell->n; 00289 for (i=0;i<npart;i++) { 00290 memcpy(pos, part[i].r.p, 3*sizeof(double)); 00291 fold_position(pos, dummy); 00292 host_data[i+g].p[0] = (float)pos[0]; 00293 host_data[i+g].p[1] = (float)pos[1]; 00294 host_data[i+g].p[2] = (float)pos[2]; 00295 00296 host_data[i+g].v[0] = (float)part[i].m.v[0]; 00297 host_data[i+g].v[1] = (float)part[i].m.v[1]; 00298 host_data[i+g].v[2] = (float)part[i].m.v[2]; 00299 #ifdef LB_ELECTROHYDRODYNAMICS 00300 host_data[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; 00301 host_data[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; 00302 host_data[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; 00303 #endif 00304 } 00305 g += npart; 00306 } 00307 } 00308 else { 00309 MPI_Recv(&host_data[g], sizes[pnode]*sizeof(LB_particle_gpu), MPI_BYTE, pnode, REQ_GETPARTS, 00310 comm_cart, &status); 00311 g += sizes[pnode]; 00312 } 00313 } 00314 } 00315 } 00316 COMM_TRACE(fprintf(stderr, "%d: finished get\n", this_node)); 00317 free(sizes); 00318 } 00319 00320 static void mpi_get_particles_slave_lb(){ 00321 00322 int n_part; 00323 int g; 00324 LB_particle_gpu *host_data_sl; 00325 Cell *cell; 00326 int c, i; 00327 00328 n_part = cells_get_n_particles(); 00329 00330 COMM_TRACE(fprintf(stderr, "%d: get_particles_slave, %d particles\n", this_node, n_part)); 00331 00332 if (n_part > 0) { 00333 /* get (unsorted) particle informations as an array of type 'particle' */ 00334 /* then get the particle information */ 00335 host_data_sl = malloc(n_part*sizeof(LB_particle_gpu)); 00336 00337 g = 0; 00338 for (c = 0; c < local_cells.n; c++) { 00339 Particle *part; 00340 int npart; 00341 int dummy[3] = {0,0,0}; 00342 double pos[3]; 00343 cell = local_cells.cell[c]; 00344 part = cell->part; 00345 npart = cell->n; 00346 00347 for (i=0;i<npart;i++) { 00348 memcpy(pos, part[i].r.p, 3*sizeof(double)); 00349 fold_position(pos, dummy); 00350 00351 host_data_sl[i+g].p[0] = (float)pos[0]; 00352 host_data_sl[i+g].p[1] = (float)pos[1]; 00353 host_data_sl[i+g].p[2] = (float)pos[2]; 00354 00355 host_data_sl[i+g].v[0] = (float)part[i].m.v[0]; 00356 host_data_sl[i+g].v[1] = (float)part[i].m.v[1]; 00357 host_data_sl[i+g].v[2] = (float)part[i].m.v[2]; 00358 #ifdef LB_ELECTROHYDRODYNAMICS 00359 host_data_sl[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; 00360 host_data_sl[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; 00361 host_data_sl[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; 00362 #endif 00363 } 00364 g+=npart; 00365 } 00366 /* and send it back to the master node */ 00367 MPI_Send(host_data_sl, n_part*sizeof(LB_particle_gpu), MPI_BYTE, 0, REQ_GETPARTS, comm_cart); 00368 free(host_data_sl); 00369 } 00370 } 00371 00372 static void mpi_send_forces_lb(LB_particle_force_gpu *host_forces){ 00373 00374 int n_part; 00375 int g, pnode; 00376 Cell *cell; 00377 int c; 00378 int i; 00379 int *sizes; 00380 sizes = malloc(sizeof(int)*n_nodes); 00381 n_part = cells_get_n_particles(); 00382 /* first collect number of particles on each node */ 00383 MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); 00384 00385 /* call slave functions to provide the slave datas */ 00386 if(this_node > 0) { 00387 mpi_send_forces_slave_lb(); 00388 } 00389 else{ 00390 /* fetch particle informations into 'result' */ 00391 g = 0; 00392 for (pnode = 0; pnode < n_nodes; pnode++) { 00393 if (sizes[pnode] > 0) { 00394 if (pnode == 0) { 00395 for (c = 0; c < local_cells.n; c++) { 00396 int npart; 00397 cell = local_cells.cell[c]; 00398 npart = cell->n; 00399 for (i=0;i<npart;i++) { 00400 cell->part[i].f.f[0] += (double)host_forces[i+g].f[0]; 00401 cell->part[i].f.f[1] += (double)host_forces[i+g].f[1]; 00402 cell->part[i].f.f[2] += (double)host_forces[i+g].f[2]; 00403 } 00404 g += npart; 00405 } 00406 } 00407 else { 00408 /* and send it back to the slave node */ 00409 MPI_Send(&host_forces[g], sizes[pnode]*sizeof(LB_particle_force_gpu), MPI_BYTE, pnode, REQ_GETPARTS, comm_cart); 00410 g += sizes[pnode]; 00411 } 00412 } 00413 } 00414 } 00415 COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node)); 00416 00417 free(sizes); 00418 } 00419 00420 static void mpi_send_forces_slave_lb(){ 00421 00422 int n_part; 00423 LB_particle_force_gpu *host_forces_sl; 00424 Cell *cell; 00425 int c, i; 00426 MPI_Status status; 00427 00428 n_part = cells_get_n_particles(); 00429 00430 COMM_TRACE(fprintf(stderr, "%d: send_particles_slave, %d particles\n", this_node, n_part)); 00431 00432 00433 if (n_part > 0) { 00434 int g = 0; 00435 /* get (unsorted) particle informations as an array of type 'particle' */ 00436 /* then get the particle information */ 00437 host_forces_sl = malloc(n_part*sizeof(LB_particle_force_gpu)); 00438 MPI_Recv(host_forces_sl, n_part*sizeof(LB_particle_force_gpu), MPI_BYTE, 0, REQ_GETPARTS, 00439 comm_cart, &status); 00440 for (c = 0; c < local_cells.n; c++) { 00441 int npart; 00442 cell = local_cells.cell[c]; 00443 npart = cell->n; 00444 for (i=0;i<npart;i++) { 00445 cell->part[i].f.f[0] += (double)host_forces_sl[i+g].f[0]; 00446 cell->part[i].f.f[1] += (double)host_forces_sl[i+g].f[1]; 00447 cell->part[i].f.f[2] += (double)host_forces_sl[i+g].f[2]; 00448 } 00449 g += npart; 00450 } 00451 free(host_forces_sl); 00452 } 00453 } 00454 /*@}*/ 00455 00456 int lb_lbnode_set_extforce_GPU(int ind[3], double f[3]) 00457 { 00458 if ( ind[0] < 0 || ind[0] >= lbpar_gpu.dim_x || 00459 ind[1] < 0 || ind[1] >= lbpar_gpu.dim_y || 00460 ind[2] < 0 || ind[2] >= lbpar_gpu.dim_z ) 00461 return ES_ERROR; 00462 00463 unsigned int index = 00464 ind[0] + ind[1]*lbpar_gpu.dim_x + ind[2]*lbpar_gpu.dim_x*lbpar_gpu.dim_y; 00465 00466 size_t size_of_extforces = (n_extern_nodeforces+1)*sizeof(LB_extern_nodeforce_gpu); 00467 host_extern_nodeforces = realloc(host_extern_nodeforces, size_of_extforces); 00468 00469 host_extern_nodeforces[n_extern_nodeforces].force[0] = (float)f[0]; 00470 host_extern_nodeforces[n_extern_nodeforces].force[1] = (float)f[1]; 00471 host_extern_nodeforces[n_extern_nodeforces].force[2] = (float)f[2]; 00472 00473 host_extern_nodeforces[n_extern_nodeforces].index = index; 00474 n_extern_nodeforces++; 00475 00476 if(lbpar_gpu.external_force == 0)lbpar_gpu.external_force = 1; 00477 00478 lb_init_extern_nodeforces_GPU(n_extern_nodeforces, host_extern_nodeforces, &lbpar_gpu); 00479 00480 return ES_OK; 00481 } 00482 00483 #endif /* LB_GPU */
1.7.5.1