21 #include <cuComplex.h> 30 #ifndef FINITE_DIFFERENCE 44 #ifdef LOG_KRYLOV_AND_STEPSIZES 45 extern __device__
double err_log[
MAX_STEPS];
49 extern __device__
double t_log[
MAX_STEPS];
50 extern __device__
double h_log[
MAX_STEPS];
51 extern __device__
bool reject_log[
MAX_STEPS];
52 extern __device__
int num_integrator_steps;
54 #ifdef DIVERGENCE_TEST 71 void integrate (
const double t_start,
const double t_end,
const double pr,
72 double* __restrict__ y,
const mechanism_memory* __restrict__ mech,
76 #ifdef CONST_TIME_STEP 77 double h = t_end - t_start;
79 double h = fmin(1.0e-8, t_end - t_start);
95 double *
const __restrict__ sc = solver->sc;
96 double *
const __restrict__ work1 = solver->work1;
97 double *
const __restrict__ work2 = solver->work2;
98 double *
const __restrict__ y1 = solver->work3;
99 cuDoubleComplex *
const __restrict__ work4 = solver->work4;
100 double *
const __restrict__ fy = mech->dy;
101 double *
const __restrict__ A = mech->jac;
102 double *
const __restrict__ Hm = solver->Hm;
103 double *
const __restrict__ Vm = solver->Vm;
104 double *
const __restrict__ phiHm = solver->phiHm;
105 double *
const __restrict__ k1 = solver->k1;
106 double *
const __restrict__ k2 = solver->k2;
107 double *
const __restrict__ k3 = solver->k3;
108 double *
const __restrict__ k4 = solver->k4;
109 double *
const __restrict__ k5 = solver->k5;
110 double *
const __restrict__ k6 = solver->k6;
111 double *
const __restrict__ k7 = solver->k7;
112 int *
const __restrict__ result = solver->result;
138 dydt (t, pr, y, fy, mech);
139 #ifdef FINITE_DIFFERENCE 146 #ifdef DIVERGENCE_TEST 149 int m =
arnoldi(1.0 / 3.0,
P, h, A, solver, fy, &beta, work1, work4);
179 for (
int i = 0; i <
NSP; ++i) {
181 work2[
INDEX(i)] = h * ((-7.0 / 300.0) * k1[
INDEX(i)] + (97.0 / 150.0) * k2[
INDEX(i)] - (37.0 / 300.0) * k3[
INDEX(i)]);
186 dydt (t, pr, k4, work1, mech);
190 for (
int i = 0; i <
NSP; ++i) {
195 int m1 =
arnoldi(1.0 / 3.0,
P, h, A, solver, k4, &beta, work1, work4);
196 if (m1 +
P >=
STRIDE || m1 < 0)
222 for (
int i = 0; i <
NSP; ++i) {
224 work2[
INDEX(i)] = h * ((59.0 / 300.0) * k1[
INDEX(i)] - (7.0 / 75.0) * k2[
INDEX(i)] + (269.0 / 300.0) * k3[
INDEX(i)] + (2.0 / 3.0) * (k4[
INDEX(i)] + k5[
INDEX(i)] + k6[
INDEX(i)]));
229 dydt (t, pr, k7, work1, mech);
233 for (
int i = 0; i <
NSP; ++i) {
237 int m2 =
arnoldi(1.0 / 3.0,
P, h, A, solver, k7, &beta, work1, work4);
238 if (m2 +
P >=
STRIDE || m2 < 0)
251 for (
int i = 0; i <
NSP; ++i) {
255 scale (y, y1, work2);
263 for (
int i = 0; i <
NSP; ++i) {
266 err = h *
sc_norm(work1, work2);
270 for (
int i = 0; i <
NSP; ++i) {
274 err = fmax(
EPS, fmin(err, h *
sc_norm(work1, work2)));
277 h_new = pow(err, -1.0 /
ORD);
279 #ifndef CONST_TIME_STEP 284 for (
int i = 0; i <
NSP; ++i)
292 h_new = fmin(h_new, (h / h_old) * pow((err_old / (err * err)), (1.0 /
ORD)));
295 h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
299 err_old = fmax(1.0e-2, err);
305 h_new = fmin(h, h_new);
307 h = fmin(h_new, t_end - t);
312 h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
313 h_new = fmin(h_new, t_end - t);
322 for (
int i = 0; i <
NSP; ++i)
#define T_ID
The global CUDA thread index.
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
Defines some simple macros to simplify GPU indexing.
#define P
max order of the phi functions (for error estimation)
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
__device__ int integrator_steps[DIVERGENCE_TEST]
If DIVERGENCE_TEST is defined, this creates a device array for tracking.
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Header definitions for solver initialization routins.
#define STRIDE
the matrix dimensions
Implementation of the GPU arnoldi iteration methods.
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Contains header definitions for the CUDA RHS function for the van der Pol example.
Structure containing memory needed for EXP4 algorithm.
Header definition of CUDA Finite Difference Jacobian.
#define EC_success
Successful time step.
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
Definitions of various linear algebra functions needed in the exponential integrators.
simple convenience file to include the correct solver properties file
__device__ void matvec_n_by_m_scale_add_subtract(const int m, const double scale, const double *__restrict__ A, const double *V, double *__restrict__ Av, const double *__restrict__ add, const double *__restrict__ sub)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
#define MAX_CONSECUTIVE_ERRORS
Number of consecutive errors on internal integration steps allowed before exit.
__device__ void matvec_m_by_m(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized MxM and a vector Mx1.
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
__device__ void integrate(const double t_start, const double t_end, const double pr, double *__restrict__ y, const mechanism_memory *__restrict__ mech, const solver_memory *__restrict__ solver)
4th-order exponential integrator function w/ adaptive Kyrlov subspace approximation ...
#define ORD
order of embedded methods
A file generated by Scons that specifies various options to the solvers.
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.
Contains a header definition for the CUDA van der Pol Jacobian evaluation.