accelerInt  v0.1
rkc.c
Go to the documentation of this file.
1 
22 #include "rkc.h"
23 #include "dydt.h"
24 #include "solver_options.h"
25 
26 #ifdef GENERATE_DOCS
27 namespace rkc {
28 #endif
29 
41 Real rkc_spec_rad (const Real t, const Real pr, const Real hmax, const Real* y,
42  const Real* F, Real* v, Real* Fv) {
43 
44  const int itmax = 50;
45  Real small = ONE / hmax;
46 
47  Real nrm1 = ZERO;
48  Real nrm2 = ZERO;
49  for (int i = 0; i < NSP; ++i) {
50  nrm1 += (y[i] * y[i]);
51  nrm2 += (v[i] * v[i]);
52  }
53  nrm1 = sqrt(nrm1);
54  nrm2 = sqrt(nrm2);
55 
56  Real dynrm;
57  if ((nrm1 != ZERO) && (nrm2 != ZERO)) {
58  dynrm = nrm1 * sqrt(UROUND);
59  for (int i = 0; i < NSP; ++i) {
60  v[i] = y[i] + v[i] * (dynrm / nrm2);
61  }
62  } else if (nrm1 != ZERO) {
63  dynrm = nrm1 * sqrt(UROUND);
64  for (int i = 0; i < NSP; ++i) {
65  v[i] = y[i] * (ONE + sqrt(UROUND));
66  }
67  } else if (nrm2 != ZERO) {
68  dynrm = UROUND;
69  for (int i = 0; i < NSP; ++i) {
70  v[i] *= (dynrm / nrm2);
71  }
72  } else {
73  dynrm = UROUND;
74  for (int i = 0; i < NSP; ++i) {
75  v[i] = UROUND;
76  }
77  }
78 
79  // now iterate using nonlinear power method
80  Real sigma = ZERO;
81  for (int iter = 1; iter <= itmax; ++iter) {
82 
83  dydt (t, pr, v, Fv);
84 
85  nrm1 = ZERO;
86  for (int i = 0; i < NSP; ++i) {
87  nrm1 += ((Fv[i] - F[i]) * (Fv[i] - F[i]));
88  }
89  nrm1 = sqrt(nrm1);
90  nrm2 = sigma;
91  sigma = nrm1 / dynrm;
92  if ((iter >= 2) && (fabs(sigma - nrm2) <= (fmax(sigma, small) * P01))) {
93  for (int i = 0; i < NSP; ++i) {
94  v[i] = v[i] - y[i];
95  }
96  return (ONEP2 * sigma);
97  }
98 
99  if (nrm1 != ZERO) {
100  for (int i = 0; i < NSP; ++i) {
101  v[i] = y[i] + ((Fv[i] - F[i]) * (dynrm / nrm1));
102  }
103  } else {
104  int ind = (iter % NSP);
105  v[ind] = y[ind] - (v[ind] - y[ind]);
106  }
107 
108  }
109  return (ONEP2 * sigma);
110 }
111 
113 
125 void rkc_step (const Real t, const Real pr, const Real h, const Real* y_0, const Real* F_0,
126  const int s, Real* y_j) {
127 
128  const Real w0 = ONE + TWO / (13.0 * (Real)(s * s));
129  Real temp1 = (w0 * w0) - ONE;
130  Real temp2 = sqrt(temp1);
131  Real arg = (Real)(s) * log(w0 + temp2);
132  const Real w1 = sinh(arg) * temp1 / (cosh(arg) * (Real)(s) * temp2 - w0 * sinh(arg));
133 
134  Real b_jm1 = ONE / (FOUR * (w0 * w0));
135  Real b_jm2 = b_jm1;
136 
137  Real y_jm1[NSP];
138  Real y_jm2[NSP];
139 
140  // calculate y_1
141  Real mu_t = w1 * b_jm1;
142  for (int i = 0; i < NSP; ++i) {
143  y_jm2[i] = y_0[i];
144  y_jm1[i] = y_0[i] + (mu_t * h * F_0[i]);
145  }
146 
147  Real c_jm2 = ZERO;
148  Real c_jm1 = mu_t;
149  Real zjm1 = w0;
150  Real zjm2 = ONE;
151  Real dzjm1 = ONE;
152  Real dzjm2 = ZERO;
153  Real d2zjm1 = ZERO;
154  Real d2zjm2 = ZERO;
155 
156  for (int j = 2; j <= s; ++j) {
157 
158  Real zj = TWO * w0 * zjm1 - zjm2;
159  Real dzj = TWO * w0 * dzjm1 - dzjm2 + TWO * zjm1;
160  Real d2zj = TWO * w0 * d2zjm1 - d2zjm2 + FOUR * dzjm1;
161  Real b_j = d2zj / (dzj * dzj);
162  Real gamma_t = ONE - (zjm1 * b_jm1);
163 
164  Real nu = -b_j / b_jm2;
165  Real mu = TWO * b_j * w0 / b_jm1;
166  mu_t = mu * w1 / w0;
167 
168  // calculate derivative, use y array for temporary storage
169  dydt (t + (h * c_jm1), pr, y_jm1, y_j);
170 
171  for (int i = 0; i < NSP; ++i) {
172  y_j[i] = (ONE - mu - nu) * y_0[i] + (mu * y_jm1[i]) + (nu * y_jm2[i])
173  + h * mu_t * (y_j[i] - (gamma_t * F_0[i]));
174  }
175  Real c_j = (mu * c_jm1) + (nu * c_jm2) + mu_t * (ONE - gamma_t);
176 
177  if (j < s) {
178  for (int i = 0; i < NSP; ++i) {
179  y_jm2[i] = y_jm1[i];
180  y_jm1[i] = y_j[i];
181  }
182  }
183 
184  c_jm2 = c_jm1;
185  c_jm1 = c_j;
186  b_jm2 = b_jm1;
187  b_jm1 = b_j;
188  zjm2 = zjm1;
189  zjm1 = zj;
190  dzjm2 = dzjm1;
191  dzjm1 = dzj;
192  d2zjm2 = d2zjm1;
193  d2zjm1 = d2zj;
194  }
195 
196 } // rkc_step
197 
199 
209 int integrate (Real t, const Real tEnd, const Real pr, Real* y) {
210 
211  int nstep = 0;
212  Real work[4 + NSP] = {0};
213 
214  int m_max = (int)(round(sqrt(RTOL / (10.0 * UROUND))));
215 
216  if (m_max < 2) {
217  m_max = 2;
218  }
219 
220  Real y_n[NSP];
221  for (int i = 0; i < NSP; ++i) {
222  y_n[i] = y[i];
223  }
224 
225  // calculate F_n for initial y
226  Real F_n[NSP];
227  dydt (t, pr, y_n, F_n);
228 
229  // load initial estimate for eigenvector
230  if (work[2] < UROUND) {
231  for (int i = 0; i < NSP; ++i) {
232  work[4 + i] = F_n[i];
233  }
234  }
235 
236  const Real hmax = fabs(tEnd - t);
237  Real hmin = TEN * UROUND * fmax(fabs(t), hmax);
238 
239  while (t < tEnd) {
240  // use time step stored in work[2]
241 
242  Real temp_arr[NSP];
243  Real temp_arr2[NSP];
244  Real err;
245 
246  // estimate Jacobian spectral radius
247  // only if 25 steps passed
248  if ((nstep % 25) == 0) {
249  work[3] = rkc_spec_rad (t, pr, hmax, y_n, F_n, &work[4], temp_arr2);
250  }
251 
252  // first step, estimate step size
253  if (work[2] < UROUND) {
254  work[2] = hmax;
255  if ((work[3] * work[2]) > ONE) {
256  work[2] = ONE / work[3];
257  }
258  work[2] = fmax(work[2], hmin);
259 
260  for (int i = 0; i < NSP; ++i) {
261  temp_arr[i] = y_n[i] + (work[2] * F_n[i]);
262  }
263  dydt (t + work[2], pr, temp_arr, temp_arr2);
264 
265  err = ZERO;
266  for (int i = 0; i < NSP; ++i) {
267  Real est = (temp_arr2[i] - F_n[i]) / (ATOL + RTOL * fabs(y_n[i]));
268  err += est * est;
269  }
270  err = work[2] * sqrt(err / NSP);
271 
272  if ((P1 * work[2]) < (hmax * sqrt(err))) {
273  work[2] = fmax(P1 * work[2] / sqrt(err), hmin);
274  } else {
275  work[2] = hmax;
276  }
277  }
278 
279  // check if last step
280  if ((ONEP1 * work[2]) >= fabs(tEnd - t)) {
281  work[2] = fabs(tEnd - t);
282  }
283 
284  // calculate number of steps
285  int m = 1 + (int)(sqrt(ONEP54 * work[2] * work[3] + ONE));
286 
287  if (m > m_max) {
288  m = m_max;
289  work[2] = (Real)(m * m - 1) / (ONEP54 * work[3]);
290  }
291 
292  hmin = TEN * UROUND * fmax(fabs(t), fabs(t + work[2]));
293 
294  // perform tentative time step
295  rkc_step (t, pr, work[2], y_n, F_n, m, y);
296 
297  // calculate F_np1 with tenative y_np1
298  dydt (t + work[2], pr, y, temp_arr);
299 
300  // estimate error
301  err = ZERO;
302  for (int i = 0; i < NSP; ++i) {
303  Real est = P8 * (y_n[i] - y[i]) + P4 * work[2] * (F_n[i] + temp_arr[i]);
304  est /= (ATOL + RTOL * fmax(fabs(y[i]), fabs(y_n[i])));
305  err += est * est;
306  }
307  err = sqrt(err / ((Real)NSP));
308 
309  if (err > ONE) {
310  // error too large, step is rejected
311 
312  // select smaller step size
313  work[2] = P8 * work[2] / (pow(err, ONE3RD));
314 
315  // reevaluate spectral radius
316  work[3] = rkc_spec_rad (t, pr, hmax, y_n, F_n, &work[4], temp_arr2);
317  } else {
318  // step accepted
319  t += work[2];
320  nstep++;
321 
322  Real fac = TEN;
323  Real temp1, temp2;
324 
325  if (work[1] < UROUND) {
326  temp2 = pow(err, ONE3RD);
327  if (P8 < (fac * temp2)) {
328  fac = P8 / temp2;
329  }
330  } else {
331  temp1 = P8 * work[2] * pow(work[0], ONE3RD);
332  temp2 = work[1] * pow(err, TWO3RD);
333  if (temp1 < (fac * temp2)) {
334  fac = temp1 / temp2;
335  }
336  }
337 
338  // set "old" values to those for current time step
339  work[0] = err;
340  work[1] = work[2];
341 
342  for (int i = 0; i < NSP; ++i) {
343  y_n[i] = y[i];
344  F_n[i] = temp_arr[i];
345  }
346 
347  // store next time step
348  work[2] *= fmax(P1, fac);
349  work[2] = fmax(hmin, fmin(hmax, work[2]));
350 
351  /* currently not supported
352  if (task == 0) {
353  // only perform one step
354  return;
355  }*/
356  }
357 
358  }
359 
360  return EC_success;
361 
362 } // rkc_driver
Contains header definitions for the RHS function for the van der Pol example.
#define TWO
Definition: rkc_props.cuh:26
Definition: rkc_init.c:11
#define TEN
Definition: rkc_props.cuh:30
#define ONEP1
Definition: rkc_props.cuh:31
#define UROUND
Definition: rkc_props.cuh:40
#define ONEP54
Definition: rkc_props.cuh:33
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22
#define ZERO
Definition: rkc_props.cuh:24
#define TWO3RD
Definition: rkc_props.cuh:39
#define P4
Definition: rkc_props.cuh:35
#define NSP
The IVP system size.
Definition: header.cuh:20
#define ONEP2
Definition: rkc_props.cuh:32
A file generated by Scons that specifies various options to the solvers.
__device__ Real rkc_spec_rad(const Real t, const Real pr, const Real hmax, const Real *y, const Real *F, Real *v, Real *Fv, mechanism_memory const *const __restrict__ mech)
Definition: rkc.cu:13
#define RTOL
#define EC_success
Successful time step.
#define ONE
Definition: rkc_props.cuh:25
#define ATOL
__device__ void integrate(const double, const double, const double, double *const __restrict__, mechanism_memory const *const __restrict__, solver_memory const *const __restrict__)
#define P1
Definition: rkc_props.cuh:36
#define Real
Definition: rkc_props.cuh:22
#define P01
Definition: rkc_props.cuh:37
__device__ void rkc_step(const Real t, const Real pr, const Real h, const Real *y_0, const Real *F_0, const int s, Real *y_j, Real *y_jm1, Real *y_jm2, mechanism_memory const *const __restrict__ mech)
Definition: rkc.cu:104
#define FOUR
Definition: rkc_props.cuh:28
#define P8
Definition: rkc_props.cuh:34
#define ONE3RD
Definition: rkc_props.cuh:38