accelerInt  v0.1
fd_jacob.cu
Go to the documentation of this file.
1 
6 #include "fd_jacob.cuh"
7 
9 #define FD_ORD 1
10 
11 // Finite difference coefficients
12 #if FD_ORD == 2
13  __constant__ double x_coeffs[FD_ORD] = {-1.0, 1.0};
14  __constant__ double y_coeffs[FD_ORD] = {-0.5, 0.5};
15 #elif FD_ORD == 4
16  __constant__ double x_coeffs[FD_ORD] = {-2.0, -1.0, 1.0, 2.0};
17  __constant__ double y_coeffs[FD_ORD] = {1.0 / 12.0, -2.0 / 3.0, 2.0 / 3.0, -1.0 / 12.0};
18 #elif FD_ORD == 6
19  __constant__ double x_coeffs[FD_ORD] = {-3.0, -2.0, - 1.0, 1.0, 2.0, 3.0};
20  __constant__ double y_coeffs[FD_ORD] = {-1.0 / 60.0, 3.0 / 20.0, -3.0 / 4.0, 3.0 / 4.0, -3.0 / 20.0, 1.0 / 60.0};
21 #endif
22 
34 __device__
35 void eval_jacob (const double t, const double pres, const double * __restrict__ cy,
36  double * __restrict__ jac, const mechanism_memory* __restrict__ d_mem,
37  double* __restrict__ y_temp, double* __restrict__ ewt) {
38  double* dy = d_mem->dy;
39 
40  #pragma unroll
41  for (int i = 0; i < NSP; ++i) {
42  y_temp[INDEX(i)] = cy[INDEX(i)];
43  ewt[INDEX(i)] = ATOL + (RTOL * fabs(cy[INDEX(i)]));
44  }
45 
46  dydt (t, pres, cy, dy, d_mem);
47  #if FD_ORD == 1
48  #pragma unroll
49  for (int j = 0; j < NSP; ++j) {
50  #pragma unroll
51  for (int i = 0; i < NSP; ++i) {
52  jac[INDEX(i + NSP*j)] = dy[INDEX(i)];
53  }
54  }
55  #endif
56 
57  // unit roundoff of machine
58  double srur = sqrt(DBL_EPSILON);
59 
60  double sum = 0.0;
61  #pragma unroll
62  for (int i = 0; i < NSP; ++i) {
63  sum += (ewt[INDEX(i)] * dy[INDEX(i)]) * (ewt[INDEX(i)] * dy[INDEX(i)]);
64  }
65  double fac = sqrt(sum / ((double)(NSP)));
66  double r0 = 1000.0 * RTOL * DBL_EPSILON * ((double)(NSP)) * fac;
67 
68 
69  #pragma unroll
70  for (int j = 0; j < NSP; ++j) {
71  double yj_orig = y_temp[INDEX(j)];
72  double r = fmax(srur * fabs(yj_orig), r0 / ewt[INDEX(j)]);
73 
74  #if FD_ORD == 1
75  y_temp[INDEX(j)] = yj_orig + r;
76  dydt (t, pres, y_temp, dy, d_mem);
77 
78  #pragma unroll
79  for (int i = 0; i < NSP; ++i) {
80  jac[INDEX(i + NSP*j)] = (dy[INDEX(i)] - jac[INDEX(i + NSP*j)]) / r;
81  }
82  #else
83  #pragma unroll
84  for (int i = 0; i < NSP; ++i) {
85  jac[INDEX(i + NSP*j)] = 0.0;
86  }
87  #pragma unroll
88  for (int k = 0; k < FD_ORD; ++k) {
89  y_temp[INDEX(j)] = yj_orig + x_coeffs[k] * r;
90  dydt (t, pres, y_temp, dy, d_mem);
91 
92  #pragma unroll
93  for (int i = 0; i < NSP; ++i) {
94  jac[INDEX(i + NSP*j)] += y_coeffs[k] * y_temp[INDEX(i)];
95  }
96  }
97  #pragma unroll
98  for (int i = 0; i < NSP; ++i) {
99  jac[INDEX(i + NSP*j)] /= r;
100  }
101  #endif
102 
103  y_temp[INDEX(j)] = yj_orig;
104  }
105 
106 }
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22
#define NSP
The IVP system size.
Definition: header.cuh:20
Header definition of CUDA Finite Difference Jacobian.
#define RTOL
__device__ void eval_jacob(const double t, const double pres, const double *__restrict__ cy, double *__restrict__ jac, const mechanism_memory *__restrict__ d_mem, double *__restrict__ y_temp, double *__restrict__ ewt)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
Definition: fd_jacob.cu:35
#define ATOL
double * y_temp
temorary storage
#define FD_ORD
The finite difference order [Default: 1].
Definition: fd_jacob.cu:9
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24