22 #include <cuComplex.h> 31 #ifndef FINITE_DIFFERENCE 46 #ifdef LOG_KRYLOV_AND_STEPSIZES 47 extern __device__
double err_log[
MAX_STEPS];
51 extern __device__
double t_log[
MAX_STEPS];
52 extern __device__
double h_log[
MAX_STEPS];
53 extern __device__
bool reject_log[
MAX_STEPS];
54 extern __device__
int num_integrator_steps;
56 #ifdef DIVERGENCE_TEST 73 __device__
void integrate (
const double t_start,
const double t_end,
const double pr,
74 double* __restrict__ y,
const mechanism_memory* __restrict__ mech,
78 #ifdef CONST_TIME_STEP 79 double h = t_end - t_start;
81 double h = fmin(1.0e-8, t_end - t_start);
93 double *
const __restrict__ sc = solver->sc;
96 #ifdef LOG_KRYLOV_AND_STEPSIZES 99 num_integrator_steps = 0;
106 double *
const __restrict__ work1 = solver->work1;
107 double *
const __restrict__ work2 = solver->work2;
108 double *
const __restrict__ y1 = solver->work3;
109 cuDoubleComplex *
const __restrict__ work4 = solver->work4;
110 double *
const __restrict__ fy = mech->dy;
111 double *
const __restrict__ A = mech->jac;
112 double *
const __restrict__ Vm = solver->Vm;
113 double *
const __restrict__ phiHm = solver->phiHm;
114 double *
const __restrict__ savedActions = solver->savedActions;
115 double *
const __restrict__ gy = solver->gy;
116 int *
const __restrict__ result = solver->result;
119 double * in[5] = {0, 0, 0, savedActions, y};
120 double * out[3] = {0, 0, work1};
121 double scale_vec[3] = {0, 0, 0};
146 dydt (t, pr, y, fy, mech);
147 #ifdef FINITE_DIFFERENCE 155 for (
int i = 0; i <
NSP; ++i) {
160 #ifdef DIVERGENCE_TEST 163 int m =
arnoldi(0.5, 1, h, A, solver, fy, &beta, work2, work4);
164 if (m + 1 >=
STRIDE || m < 0)
194 for (
int i = 0; i <
NSP; ++i) {
204 int m1 =
arnoldi(1.0, 4, h, A, solver, work1, &beta, work2, work4);
205 if (m1 + 4 >=
STRIDE || m1 < 0)
222 scale_vec[0] = beta / (h * h);
223 scale_vec[1] = beta / (h * h * h);
234 for (
int i = 0; i <
NSP; ++i) {
240 int m2 =
arnoldi(1.0, 4, h, A, solver, work1, &beta, work2, work4);
241 if (m2 + 4 >=
STRIDE || m2 < 0)
253 scale_vec[0] = beta / (h * h);
254 scale_vec[1] = beta / (h * h * h);
259 for (
int i = 0; i <
NSP; ++i) {
268 scale (y, y1, work2);
272 h_new = pow(err, -1.0 /
ORD);
274 #ifdef LOG_KRYLOV_AND_STEPSIZES 275 if (
T_ID == 0 && num_integrator_steps >= 0) {
276 err_log[num_integrator_steps] = err;
277 m_log[num_integrator_steps] = m;
278 m1_log[num_integrator_steps] = m1;
279 m2_log[num_integrator_steps] = m2;
280 t_log[num_integrator_steps] = t;
281 h_log[num_integrator_steps] = h;
282 reject_log[num_integrator_steps] = err > 1.0;
283 num_integrator_steps++;
286 printf(
"Number of steps out of bounds! Overwriting\n");
287 num_integrator_steps = -1;
292 #ifndef CONST_TIME_STEP 297 for (
int i = 0; i <
NSP; ++i)
305 h_new = fmin(h_new, (h / h_old) * pow((err_old / (err * err)), (1.0 /
ORD)));
308 h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
311 err_old = fmax(1.0e-2, err);
316 h_new = fmin(h, h_new);
319 h = fmin(h_new, t_end - t);
323 h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
324 h_new = fmin(h_new, t_end - t);
333 for (
int i = 0; i <
NSP; ++i)
#define T_ID
The global CUDA thread index.
__device__ void integrate(const double t_start, const double t_end, const double pr, double *__restrict__ y, const mechanism_memory *__restrict__ mech, const solver_memory *__restrict__ solver)
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
Defines some simple macros to simplify GPU indexing.
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
__device__ int integrator_steps[DIVERGENCE_TEST]
If DIVERGENCE_TEST is defined, this creates a device array for tracking.
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Header definitions for solver initialization routins.
#define STRIDE
the matrix dimensions
Implementation of the GPU arnoldi iteration methods.
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Contains header definitions for the CUDA RHS function for the van der Pol example.
Various macros controlling behaviour of RB43 algorithm.
__device__ void matvec_n_by_m_scale_special2(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Header definition of CUDA Finite Difference Jacobian.
#define EC_success
Successful time step.
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
Definitions of various linear algebra functions needed in the exponential integrators.
simple convenience file to include the correct solver properties file
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
#define MAX_CONSECUTIVE_ERRORS
Number of consecutive errors on internal integration steps allowed before exit.
__device__ void matvec_m_by_m_plusequal(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector plus equals for a matrix of size MxM and vector of size Mx1. That is, it returns (A + I) * v.
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
#define ORD
order of embedded methods
A file generated by Scons that specifies various options to the solvers.
__device__ void matvec_n_by_m_scale_special(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.
Contains a header definition for the CUDA van der Pol Jacobian evaluation.