accelerInt  v0.1
arnoldi.cuh
Go to the documentation of this file.
1 
11 #ifndef ARNOLDI_CUH
12 #define ARNOLDI_CUH
13 
14 #include <string.h>
15 
16 #include "header.cuh"
17 #include "phiAHessenberg.cuh"
19 #include "sparse_multiplier.cuh"
20 #include "solver_options.cuh"
21 #include "solver_props.cuh"
22 
23 //#define EXACT_KRYLOV
24 
26 __constant__ int index_list[23] = {1, 2, 3, 4, 5, 6, 7, 9, 11, 14, 17, 21, 27, 34, 42, 53, 67, 84, 106, 133, 167, 211, 265};
27 
29 
50 __device__
51 int arnoldi(const double scale,
52  const int p, const double h,
53  const double* __restrict__ A,
54  const solver_memory* __restrict__ solver,
55  const double* __restrict__ v, double* __restrict__ beta,
56  double * __restrict__ work,
57  cuDoubleComplex* __restrict__ work2)
58 {
59  const double* __restrict__ sc = solver->sc;
60  double* __restrict__ Vm = solver->Vm;
61  double* __restrict__ Hm = solver->Hm;
62  double* __restrict__ phiHm = solver->phiHm;
63 
64  //first place A*fy in the Vm matrix
65  *beta = normalize(v, Vm);
66 
67  double store = 0;
68  int index = 0;
69  int j = 0;
70  double err = 2.0;
71  int info = 0;
72 
73  while (err >= 1.0)
74  {
75  for (; j < index_list[index] && j + p < STRIDE; j++)
76  {
77  sparse_multiplier(A, &Vm[GRID_DIM * (j * NSP)], work);
78  for (int i = 0; i <= j; i++)
79  {
80  Hm[INDEX(j * STRIDE + i)] = dotproduct(work, &Vm[GRID_DIM * (i * NSP)]);
81  scale_subtract(Hm[INDEX(j * STRIDE + i)], &Vm[GRID_DIM * (i * NSP)], work);
82  }
83  Hm[INDEX(j * STRIDE + j + 1)] = two_norm(work);
84  if (fabs(Hm[INDEX(j * STRIDE + j + 1)]) < ATOL)
85  {
86  //happy breakdown
87  j++;
88  break;
89  }
90  scale_mult(1.0 / Hm[INDEX(j * STRIDE + j + 1)], work, &Vm[GRID_DIM * ((j + 1) * NSP)]);
91  }
92 #ifndef CONST_TIME_STEP
93  if (j + p >= STRIDE)
94  return j;
95 #else
96  if (j + p >= STRIDE)
97  j = STRIDE - p - 1;
98 #endif
99  index++;
100  //resize Hm to be mxm, and store Hm(m, m + 1) for later
101  store = Hm[INDEX((j - 1) * STRIDE + j)];
102  Hm[INDEX((j - 1) * STRIDE + j)] = 0.0;
103 
104  //0. fill potentially non-empty memory first
105  for (int i = 0; i < (j + 2); ++i)
106  Hm[INDEX(j * STRIDE + i)] = 0;
107 
108  //get error
109  //1. Construct augmented Hm (fill in identity matrix)
110  Hm[INDEX((j) * STRIDE)] = 1.0;
111  #pragma unroll
112  for (int i = 1; i < p; i++)
113  {
114  //0. fill potentially non-empty memory first
115  for (int k = 0; k < (j + i + 2); ++k)
116  Hm[INDEX((j + i) * STRIDE + k)] = 0;
117  //1. Construct augmented Hm (fill in identity matrix)
118  Hm[INDEX((j + i) * STRIDE + (j + i - 1))] = 1.0;
119  }
120 
121 #ifdef RB43
122  //2. Get phiHm
123  info = expAc_variable (j + p, Hm, h * scale, phiHm, solver, work2);
124 #elif EXP4
125  //2. Get phiHm
126  info = phiAc_variable (j + p, Hm, h * scale, phiHm, solver, work2);
127 #endif
128  if (info != 0)
129  return -info;
130 
131  //3. Get error
132  err = h * (*beta) * fabs(store * phiHm[INDEX((j) * STRIDE + (j) - 1)]) * sc_norm(&Vm[GRID_DIM * ((j) * NSP)], sc);
133 
134  //restore Hm(m, m + 1)
135  Hm[INDEX((j - 1) * STRIDE + j)] = store;
136 
137  //restore real Hm
138  Hm[INDEX((j) * STRIDE)] = 0.0;
139 #if defined(LOG_OUTPUT) && defined(EXACT_KRYLOV)
140  //kill the error such that we will continue
141  //and greatly reduce the subspace approximation error
142  if (index_list[index] + p < STRIDE && fabs(Hm[INDEX((j - 1) * STRIDE + j)]) >= ATOL)
143  {
144  err = 10;
145  }
146 #endif
147 #ifdef CONST_TIME_STEP
148  if (j == STRIDE - p - 1)
149  break;
150 #endif
151  }
152 
153  return j;
154 }
155 
156 #endif
__device__ double dotproduct(const double *__restrict__ w, const double *__restrict__ Vm)
Performs the dot product of the w (NSP x 1) vector with the given subspace vector (NSP x 1) ...
__device__ void scale_subtract(const double s, const double *__restrict__ Vm, double *__restrict__ w)
Subtracts Vm scaled by s from w.
int expAc_variable(const int m, const double *A, const double c, double *phiA)
Compute the zeroth order Phi (exponential) matrix function. This is the regular matrix exponential...
#define NSP
The IVP system size.
Definition: header.cuh:20
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Definition: gpu_macros.cuh:20
#define STRIDE
the matrix dimensions
__constant__ int index_list[23]
The list of indicies to check the Krylov projection error at.
Definition: arnoldi.cuh:26
__device__ void scale_mult(const double s, const double *__restrict__ w, double *__restrict__ Vm)
Sets Vm to s * w.
int phiAc_variable(const int m, const double *A, const double c, double *phiA)
Compute the first order Phi (exponential) matrix function.
__device__ double normalize(const double *__restrict__ v, double *__restrict__ v_out)
Normalize the input vector using a 2-norm.
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
Definitions of various linear algebra functions needed in the exponential integrators.
simple convenience file to include the correct solver properties file
#define ATOL
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
Header definition for CUDA Jacobian vector multiplier, used in exponential integrators.
An example header file that defines system size, memory functions and other required methods for inte...
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
Definition: arnoldi.cuh:51
Header for Matrix exponential (phi) methods.
A file generated by Scons that specifies various options to the solvers.
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24
__device__ double two_norm(const double *__restrict__ v)
Computes and returns the two norm of a vector.