49#include "communication.hpp"
53#include "system/System.hpp"
63#include <boost/mpi/collectives/all_reduce.hpp>
64#include <boost/mpi/collectives/reduce.hpp>
78 double local_mu2 = 0.;
80 for (
auto const &p :
get_system().cell_structure->local_particles()) {
82 local_mu2 += p.calc_dip().norm2();
92 int n_c_part,
double sum_q2,
double alpha_L);
95 int n_c_part,
double sum_q2,
102 double sum_q2,
double x1,
double x2,
double xacc,
103 double tuned_accuracy);
105double DipolarP3M::calc_average_self_energy_k_space()
const {
114 boost::mpi::reduce(
comm_cart, node_phi, phi, std::plus<>(), 0);
115 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(
dp3m.
params.
mesh[0]);
125 auto const &box_geo = *system.box_geo;
126 auto const &local_geo = *system.local_geo;
127 auto const verlet_skin = system.cell_structure->get_verlet_skin();
143 val.resize(ca_mesh_size);
155 int tune_timings,
bool tune_verbose)
156 : dp3m{std::move(parameters)}, tune_timings{tune_timings},
157 tune_verbose{tune_verbose} {
164 throw std::domain_error(
"Parameter 'timings' must be > 0");
168 throw std::domain_error(
"DipolarP3M requires a cubic mesh");
176 auto const weights = p3m_calculate_interpolation_weights<cao>(
178 p3m_interpolate<cao>(dp3m.
local_mesh, weights,
179 [&dip, &dp3m](
int ind,
double w) {
180 dp3m.rs_mesh_dip[0][ind] += w * dip[0];
181 dp3m.rs_mesh_dip[1][ind] += w * dip[1];
182 dp3m.rs_mesh_dip[2][ind] += w * dip[2];
198 for (
auto const &p : particles) {
199 if (p.dipm() != 0.) {
201 p.pos(), p.calc_dip());
212 auto p_index = std::size_t{0ul};
214 for (
auto &p : particles) {
215 if (p.dipm() != 0.) {
220 [&E, &dp3m, d_rs](
int ind,
double w) {
221 E[d_rs] += w * dp3m.rs_mesh[ind];
236 auto p_index = std::size_t{0ul};
238 for (
auto &p : particles) {
239 if (p.dipm() != 0.) {
244 E[0] += w * dp3m.rs_mesh_dip[0][ind];
245 E[1] += w * dp3m.rs_mesh_dip[1][ind];
246 E[2] += w * dp3m.rs_mesh_dip[2][ind];
249 p.force()[d_rs] += p.calc_dip() * prefac * E;
262 auto const &box_geo = *system.box_geo;
265 auto const npt_flag =
268 auto constexpr npt_flag =
false;
290 if (energy_flag or npt_flag) {
300 double node_energy = 0.0;
325 node_energy *= dipole_prefac *
Utils::pi() * box_geo.length_inv()[0];
326 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
329 calc_energy_correction();
348 auto const two_pi_L_i = 2. *
Utils::pi() * box_geo.length_inv()[0];
387 for (
int d = 0; d < 3; d++) {
411 Utils::integral_parameter<int, AssignTorques, 1, 7>(
453 for (
int d = 0; d < 3; d++) {
498 Utils::integral_parameter<int, AssignForces, 1, 7>(
506 auto const surface_term =
507 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
509 energy += surface_term;
514 fprintf(stderr,
"dipolar_P3M at this moment is added to p_vir[0]\n");
516 if (not energy_flag) {
523double DipolarP3M::calc_surface_term(
bool force_flag,
bool energy_flag,
528 auto const n_local_part = particles.
size();
532 std::vector<double> mx(n_local_part);
533 std::vector<double> my(n_local_part);
534 std::vector<double> mz(n_local_part);
537 for (
auto const &p : particles) {
538 auto const dip = p.calc_dip();
547 for (std::size_t i = 0
u; i < n_local_part; i++) {
548 local_dip[0] += mx[i];
549 local_dip[1] += my[i];
550 local_dip[2] += mz[i];
553 boost::mpi::all_reduce(
comm_cart, local_dip, std::plus<>());
558 for (std::size_t i = 0
u; i < n_local_part; i++) {
559 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
562 0.5 * pref * boost::mpi::all_reduce(
comm_cart, sum_e, std::plus<>());
567 std::vector<double> sumix(n_local_part);
568 std::vector<double> sumiy(n_local_part);
569 std::vector<double> sumiz(n_local_part);
571 for (std::size_t i = 0
u; i < n_local_part; i++) {
572 sumix[i] = my[i] * box_dip[2] - mz[i] * box_dip[1];
573 sumiy[i] = mz[i] * box_dip[0] - mx[i] * box_dip[2];
574 sumiz[i] = mx[i] * box_dip[1] - my[i] * box_dip[0];
579 auto &
torque = p.torque();
580 torque[0] -= pref * sumix[ip];
581 torque[1] -= pref * sumiy[ip];
582 torque[2] -= pref * sumiz[ip];
590void DipolarP3M::calc_influence_function_force() {
598void DipolarP3M::calc_influence_function_energy() {
608 int m_mesh_max = -1, m_mesh_min = -1;
612 double prefactor,
int timings)
619 std::optional<std::string>
626 m_logger = std::make_unique<TuningLogger>(
634 std::tuple<double, double, double, double>
636 double r_cut_iL)
const override {
638 double alpha_L, rs_err, ks_err;
650 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
673 auto const expo = std::log(std::cbrt(dp3m.
sum_dip_part)) / std::log(2.);
675 m_mesh_min =
static_cast<int>(std::round(std::pow(2., std::floor(expo))));
679 m_mesh_min = m_mesh_max = dp3m.
params.
mesh[0];
687 for (
auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
692 auto const trial_time =
693 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
694 trial_params.alpha_L, trial_params.accuracy);
704 if (trial_time < time_best) {
707 tuned_params = trial_params;
708 time_best = tuned_params.time = trial_time;
721 auto const &box_geo = *system.box_geo;
731 throw std::runtime_error(
732 "DipolarP3M: no dipolar particles in the system");
744 system.on_dipoles_change();
755 double mesh_i,
int cao,
double alpha_L_i) {
763 auto const nmx = nx + mx * mesh;
764 auto const fnmx = mesh_i * nmx;
766 auto const nmy = ny + my * mesh;
767 auto const fnmy = mesh_i * nmy;
769 auto const nmz = nz + mz * mesh;
770 auto const fnmz = mesh_i * nmz;
773 auto const ex = std::exp(-factor1 * nm2);
775 auto const U2 = pow(sinc(fnmx) * sinc(fnmy) * sinc(fnmz), 2. * cao);
778 alias2 += U2 * ex * pow((nx * nmx + ny * nmy + nz * nmz), 3.) / nm2;
782 return std::make_pair(alias1, alias2);
787 int n_c_part,
double sum_q2,
double alpha_L) {
789 auto const mesh_i = 1. / mesh;
790 auto const alpha_L_i = 1. / alpha_L;
792 for (
int nx = -mesh / 2; nx < mesh / 2; nx++)
793 for (
int ny = -mesh / 2; ny < mesh / 2; ny++)
794 for (
int nz = -mesh / 2; nz < mesh / 2; nz++)
795 if ((nx != 0) || (ny != 0) || (nz != 0)) {
800 auto const [alias1, alias2] =
804 Utils::int_pow<3>(
static_cast<double>(n2));
812 Utils::int_pow<4>(box_size);
822 int n_c_part,
double sum_q2,
824 double d_error_f, d_cc, d_dc, d_con;
826 auto const d_rcut = r_cut_iL * box_size;
832 auto const d_c = sum_q2 * exp(-d_a2 * d_rcut2);
836 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
837 20. *
Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
839 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) *
Utils::sqr(d_a2) * d_rcut *
840 Utils::sqr(d_rcut4) *
static_cast<double>(n_c_part));
842 d_error_f = d_c * d_con *
844 (2. / 15.) *
Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
854 double sum_q2,
double x1,
double x2,
double xacc,
855 double tuned_accuracy) {
856 constexpr int JJ_RTBIS_MAX = 40;
866 if (f1 * f2 >= 0.0) {
867 throw std::runtime_error(
868 "Root must be bracketed for bisection in dp3m_rtbisection");
872 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
873 for (
int j = 1; j <= JJ_RTBIS_MAX; j++) {
874 auto const xmid = rtb + (dx *= 0.5);
880 if (fabs(dx) < xacc || fmid == 0.0)
883 throw std::runtime_error(
"Too many bisections in dp3m_rtbisection");
886void DipolarP3M::sanity_checks_boxl()
const {
888 auto const &box_geo = *system.box_geo;
889 auto const &local_geo = *system.local_geo;
890 for (
unsigned int i = 0
u; i < 3u; i++) {
893 std::stringstream msg;
895 <<
" is larger than half of box dimension " << box_geo.length()[i];
896 throw std::runtime_error(msg.str());
899 std::stringstream msg;
901 <<
" is larger than local box dimension " << local_geo.length()[i];
902 throw std::runtime_error(msg.str());
906 if ((box_geo.length()[0] != box_geo.length()[1]) or
907 (box_geo.length()[1] != box_geo.length()[2])) {
908 throw std::runtime_error(
"DipolarP3M: requires a cubic box");
912void DipolarP3M::sanity_checks_periodicity()
const {
914 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
915 throw std::runtime_error(
916 "DipolarP3M: requires periodicity (True, True, True)");
920void DipolarP3M::sanity_checks_cell_structure()
const {
921 auto const &local_geo = *
get_system().local_geo;
924 throw std::runtime_error(
925 "DipolarP3M: requires the regular or hybrid decomposition cell system");
929 throw std::runtime_error(
930 "DipolarP3M: does not work with the hybrid decomposition cell system, "
931 "if using more than one MPI node");
935void DipolarP3M::sanity_checks_node_grid()
const {
937 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
938 throw std::runtime_error(
939 "DipolarP3M: node grid must be sorted, largest first");
943void DipolarP3M::scaleby_box_l() {
949 sanity_checks_boxl();
950 calc_influence_function_force();
951 calc_influence_function_energy();
955void DipolarP3M::calc_energy_correction() {
957 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
__global__ float float * torque
std::optional< std::string > layer_correction_veto_r_cut(double) const override
TuningAlgorithm::Parameters get_time() override
void setup_logger(bool verbose) override
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
P3MParameters & get_params() override
DipolarTuningAlgorithm(System::System &system, dp3m_data_struct &input_dp3m, double prefactor, int timings)
void determine_mesh_limits() override
void on_solver_change() const override
void set_prefactor(double new_prefactor)
double prefactor
Magnetostatics prefactor.
base_type::size_type size() const
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
static Vector< T, N > broadcast(T const &s)
Create a vector that has all entries set to one value.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
InterpolationWeights< cao > load(std::size_t i) const
Load entry from the cache.
void reset(int cao)
Reset the cache.
void spread_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
void resize(const boost::mpi::communicator &comm, const P3MLocalMesh &local_mesh)
void gather_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
__device__ void vector_product(float const *a, float const *b, float *out)
static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
void npt_add_virial_magnetic_contribution(double energy)
Update the NpT virial.
double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
static auto dp3m_tune_aliasing_sums(int nx, int ny, int nz, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
static double dp3m_real_space_error(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
P3M algorithm for long-range magnetic dipole-dipole interaction.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
void fft_perform_forw(double *data, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place forward 3D FFT.
void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place backward 3D FFT.
int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, fft_data_struct &fft, Utils::Vector3i const &grid, boost::mpi::communicator const &comm)
Initialize everything connected to the 3D-FFT.
Routines, row decomposition, data structures and communication for the 3D-FFT.
double grid_influence_function_self_energy(P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_end, std::vector< double > const &g)
Calculate self-energy of the influence function.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
ParticleRange particles(Utils::Span< Cell *const > cells)
DEVICE_QUALIFIER constexpr T pi()
Ratio of diameter and circumference of a circle.
DEVICE_QUALIFIER T sinc(T d)
Calculates the sinc-function as sin(PI*x)/(PI*x).
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
DEVICE_QUALIFIER constexpr Span< T > make_span(T *p, std::size_t N)
DEVICE_QUALIFIER constexpr T sqrt_2()
Square root of 2.
DEVICE_QUALIFIER constexpr T sqrt_pi_i()
One over square root of pi.
void npt_add_virial_contribution(double energy)
Exports for the NpT code.
static __device__ double p3m_analytic_cotangent_sum(int n, double mesh_i)
Utils::Vector3i node_grid
void init()
Recalculate all derived parameters.
int tune_timings
Magnetostatics prefactor.
void dipole_assign(ParticleRange const &particles)
Assign the physical dipoles using the tabulated assignment function.
dp3m_data_struct dp3m
Dipolar P3M parameters.
DipolarP3M(P3MParameters &¶meters, double prefactor, int tune_timings, bool tune_verbose)
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
void count_magnetic_particles()
Count the number of magnetic particles and calculate the sum of the squared dipole moments.
void tune()
Tune dipolar P3M parameters to desired accuracy.
Utils::Vector3i dim
dimension (size) of local mesh.
int size
number of local mesh points.
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
void calc_local_ca_mesh(P3MParameters const ¶ms, LocalBox const &local_geo, double skin, double space_layer)
Calculate properties of the local FFT mesh for the charge assignment process.
int margin[6]
number of margin mesh points.
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
int cao3
number of points unto which a single charge is interpolated, i.e.
Utils::Vector3d mesh_off
offset of the first mesh point (lower left corner) from the coordinate origin ([0,...
Utils::Vector3d ai
inverse mesh constant.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0).
double epsilon
epsilon of the "surrounding dielectric".
void operator()(dp3m_data_struct &dp3m, Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const
void operator()(dp3m_data_struct const &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
void operator()(dp3m_data_struct const &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
p3m_send_mesh sm
send/recv mesh sizes
P3MLocalMesh local_mesh
local mesh.
fft_vector< double > rs_mesh
real space mesh (local) for CA/FFT.
double sum_mu2
Sum of square of magnetic dipoles (only on head node).
std::array< fft_vector< double >, 3 > rs_mesh_dip
real space mesh (local) for CA/FFT of the dipolar field.
int sum_dip_part
number of dipolar particles (only on head node).
p3m_interpolation_cache inter_weights
std::vector< double > ks_mesh
k-space mesh (local) for k-space calculation and FFT.
double energy_correction
cached k-space self-energy correction
fft_forw_plan plan[4]
Information for forward FFTs.
int start[3]
lower left point of local FFT mesh in global FFT mesh coordinates.
int new_mesh[3]
size of local mesh after communication, also used for actual FFT.
std::vector< double > g_energy
Energy optimised influence function (k-space)
void calc_differential_operator()
Calculate the Fourier transformed differential operator.
std::array< std::vector< int >, 3 > d_op
Spatial differential operator in k-space.
std::vector< double > g_force
Force optimised influence function (k-space)
int ks_pnum
number of permutations in k_space