accelerInt  v0.1
phiAHessenberg.cu
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include "header.cuh"
8 #include "solver_options.cuh"
9 #include "solver_props.cuh"
10 #include "complexInverse.cuh"
11 
12 extern __device__ __constant__ cuDoubleComplex poles[N_RA];
13 extern __device__ __constant__ cuDoubleComplex res[N_RA];
14 
26 __device__
27 int phi2Ac_variable(const int m, const double* __restrict__ A, const double c,
28  double* __restrict__ phiA, const solver_memory* __restrict__ solver,
29  cuDoubleComplex* __restrict__ work) {
30 
31  cuDoubleComplex * const __restrict__ invA = solver->invA;
32  int * const __restrict__ ipiv = solver->ipiv;
33  int info = 0;
34 
35  #pragma unroll
36  for (int i = 0; i < m; ++i) {
37  #pragma unroll
38  for (int j = 0; j < m; ++j) {
39  phiA[INDEX(i + j*STRIDE)] = 0.0;
40  }
41  }
42 
43  #pragma unroll
44  for (int q = 0; q < N_RA; q += 2) {
45 
46  // compute transpose and multiply with constant
47  for (int i = 0; i < m; ++i) {
48  for (int j = 0; j < m; ++j) {
49  // A - theta * I
50  if (i == j) {
51  invA[INDEX(i + j*STRIDE)] = cuCsub(make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0), poles[q]);
52  } else {
53  invA[INDEX(i + j*STRIDE)] = make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0);
54  }
55  }
56  }
57 
58  // takes care of (A * c - poles(q) * I)^-1
59  getComplexInverseHessenberg (m, invA, ipiv, &info, work);
60 
61  if (info != 0)
62  return info;
63 
64  #pragma unroll
65  for (int i = 0; i < m; ++i) {
66  #pragma unroll
67  for (int j = 0; j < m; ++j) {
68  phiA[INDEX(i + j*STRIDE)] += 2.0 * cuCreal( cuCmul( cuCdiv(res[q], cuCmul(poles[q], poles[q])), invA[INDEX(i + j*STRIDE)]) );
69  }
70  }
71  }
72  return 0;
73 }
74 
86 __device__
87 int phiAc_variable(const int m, const double* __restrict__ A, const double c,
88  double* __restrict__ phiA, const solver_memory* __restrict__ solver,
89  cuDoubleComplex* __restrict__ work) {
90 
91  cuDoubleComplex * const __restrict__ invA = solver->invA;
92  int * const __restrict__ ipiv = solver->ipiv;
93  int info = 0;
94 
95  #pragma unroll
96  for (int i = 0; i < m; ++i) {
97  #pragma unroll
98  for (int j = 0; j < m; ++j) {
99  phiA[INDEX(i + j*STRIDE)] = 0.0;
100  }
101  }
102 
103  #pragma unroll
104  for (int q = 0; q < N_RA; q += 2) {
105 
106  // compute transpose and multiply with constant
107  for (int i = 0; i < m; ++i) {
108  for (int j = 0; j < m; ++j) {
109  // A - theta * I
110  if (i == j) {
111  invA[INDEX(i + j*STRIDE)] = cuCsub(make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0), poles[q]);
112  } else {
113  invA[INDEX(i + j*STRIDE)] = make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0);
114  }
115  }
116  }
117 
118  // takes care of (A * c - poles(q) * I)^-1
119  getComplexInverseHessenberg (m, invA, ipiv, &info, work);
120 
121  if (info != 0)
122  return info;
123 
124  #pragma unroll
125  for (int i = 0; i < m; ++i) {
126  #pragma unroll
127  for (int j = 0; j < m; ++j) {
128  phiA[INDEX(i + j*STRIDE)] += 2.0 * cuCreal( cuCmul( cuCdiv(res[q], poles[q]), invA[INDEX(i + j*STRIDE)]) );
129  }
130  }
131  }
132  return 0;
133 }
134 
147 __device__
148 int expAc_variable(const int m, const double* __restrict__ A, const double c,
149  double* __restrict__ phiA, const solver_memory* __restrict__ solver,
150  cuDoubleComplex* __restrict__ work) {
151 
152  cuDoubleComplex * const __restrict__ invA = solver->invA;
153  int * const __restrict__ ipiv = solver->ipiv;
154  int info = 0;
155 
156  #pragma unroll
157  for (int i = 0; i < m; ++i) {
158  #pragma unroll
159  for (int j = 0; j < m; ++j) {
160  phiA[INDEX(i + j*STRIDE)] = 0.0;
161  }
162  }
163 
164  #pragma unroll
165  for (int q = 0; q < N_RA; q += 2) {
166 
167  // compute transpose and multiply with constant
168  for (int i = 0; i < m; ++i) {
169  for (int j = 0; j < m; ++j) {
170  // A - theta * I
171  if (i == j) {
172  invA[INDEX(i + j*STRIDE)] = cuCsub(make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0), poles[q]);
173  } else {
174  invA[INDEX(i + j*STRIDE)] = make_cuDoubleComplex(c * A[INDEX(i + j*STRIDE)], 0.0);
175  }
176  }
177  }
178 
179  // takes care of (A * c - poles(q) * I)^-1
180  getComplexInverseHessenberg (m, invA, ipiv, &info, work);
181 
182  if (info != 0)
183  return info;
184 
185 
186  #pragma unroll
187  for (int i = 0; i < m; ++i) {
188  #pragma unroll
189  for (int j = 0; j < m; ++j) {
190  phiA[INDEX(i + j*STRIDE)] += 2.0 * cuCreal( cuCmul( res[q], invA[INDEX(i + j*STRIDE)]) );
191  }
192  }
193  }
194  return 0;
195 }
__device__ __constant__ cuDoubleComplex res[N_RA]
__device__ __constant__ cuDoubleComplex poles[N_RA]
void getComplexInverseHessenberg(const int n, double complex *__restrict__ A, int *__restrict__ ipiv, int *__restrict__ info)
getComplexInverseHessenberg computes the inverse of an upper Hessenberg matrix A using a LU factoriza...
__device__ int phi2Ac_variable(const int m, const double *__restrict__ A, const double c, double *__restrict__ phiA, const solver_memory *__restrict__ solver, cuDoubleComplex *__restrict__ work)
Compute the 2nd order Phi (exponential) matrix function.
#define N_RA
Header definitions for CUDA LU factorization routines.
#define STRIDE
the matrix dimensions
simple convenience file to include the correct solver properties file
An example header file that defines system size, memory functions and other required methods for inte...
__device__ int expAc_variable(const int m, const double *__restrict__ A, const double c, double *__restrict__ phiA, const solver_memory *__restrict__ solver, cuDoubleComplex *__restrict__ work)
Compute the zeroth order Phi (exponential) matrix function. This is the regular matrix exponential...
A file generated by Scons that specifies various options to the solvers.
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24
__device__ int phiAc_variable(const int m, const double *__restrict__ A, const double c, double *__restrict__ phiA, const solver_memory *__restrict__ solver, cuDoubleComplex *__restrict__ work)
Compute the first order Phi (exponential) matrix function.