accelerInt  v0.1
exponential_linear_algebra.cu
Go to the documentation of this file.
1 
11 
13 
14 __device__
15 void matvec_m_by_m (const int m, const double * const __restrict__ A,
16  const double * const __restrict__ V,
17  double * const __restrict__ Av) {
18  //for each row
19  for (int i = 0; i < m; ++i) {
20  Av[INDEX(i)] = A[INDEX(i)] * V[INDEX(0)];
21 
22  //go across a row of A, multiplying by a column of phiHm
23  for (int j = 1; j < m; ++j) {
24  Av[INDEX(i)] += A[INDEX(j * STRIDE + i)] * V[INDEX(j)];
25  }
26  }
27 }
28 
30 
31 __device__ void matvec_m_by_m_plusequal (const int m, const double * const __restrict__ A,
32  const double * const __restrict__ V, double * const __restrict__ Av)
33 {
34  //for each row
35  for (int i = 0; i < m; ++i) {
36  Av[INDEX(i)] = A[INDEX(i)] * V[INDEX(0)];
37 
38  //go across a row of A, multiplying by a column of phiHm
39  for (int j = 1; j < m; ++j) {
40  Av[INDEX(i)] += A[INDEX(j * STRIDE + i)] * V[INDEX(j)];
41  }
42 
43  Av[INDEX(i)] += V[INDEX(i)];
44  }
45 }
46 
47 __device__
48 void matvec_n_by_m_scale (const int m, const double scale,
49  const double * const __restrict__ A,
50  const double * const __restrict__ V,
51  double * const __restrict__ Av) {
52  //for each row
53  #pragma unroll
54  for (int i = 0; i < NSP; ++i) {
55  Av[INDEX(i)] = A[INDEX(i)] * V[INDEX(0)];
56 
57  //go across a row of A, multiplying by a column of phiHm
58  for (int j = 1; j < m; ++j) {
59  Av[INDEX(i)] += A[INDEX(j * NSP + i)] * V[INDEX(j)];
60  }
61 
62  Av[INDEX(i)] *= scale;
63  }
64 }
65 
66 __device__
67 void matvec_n_by_m_scale_special (const int m, const double * __restrict__ scale,
68  const double * __restrict__ A,
69  double * const __restrict__ * V,
70  double * __restrict__ * Av) {
71  //for each row
72  #pragma unroll
73  for (int i = 0; i < NSP; ++i) {
74  Av[0][INDEX(i)] = A[INDEX(i)] * V[0][INDEX(0)];
75  Av[1][INDEX(i)] = A[INDEX(i)] * V[1][INDEX(0)];
76  Av[2][INDEX(i)] = A[INDEX(i)] * V[2][INDEX(0)];
77 
78  //go across a row of A, multiplying by a column of phiHm
79  for (int j = 1; j < m; ++j) {
80  Av[0][INDEX(i)] += A[INDEX(j * NSP + i)] * V[0][INDEX(j)];
81  Av[1][INDEX(i)] += A[INDEX(j * NSP + i)] * V[1][INDEX(j)];
82  Av[2][INDEX(i)] += A[INDEX(j * NSP + i)] * V[2][INDEX(j)];
83  }
84 
85  Av[0][INDEX(i)] *= scale[0];
86  Av[1][INDEX(i)] *= scale[1];
87  Av[2][INDEX(i)] = scale[2] * Av[2][INDEX(i)] + V[3][INDEX(i)] + V[4][INDEX(i)];
88  }
89 }
90 
91 __device__
92 void matvec_n_by_m_scale_special2 (const int m, const double* __restrict__ scale, const double* __restrict__ A,
93  double* const __restrict__ * V, double* __restrict__ * Av) {
94  //for each row
95  #pragma unroll
96  for (int i = 0; i < NSP; ++i) {
97  Av[0][INDEX(i)] = A[INDEX(i)] * V[0][INDEX(0)];
98  Av[1][INDEX(i)] = A[INDEX(i)] * V[1][INDEX(0)];
99 
100  //go across a row of A, multiplying by a column of phiHm
101  for (int j = 1; j < m; ++j) {
102  Av[0][INDEX(i)] += A[INDEX(j * NSP + i)] * V[0][INDEX(j)];
103  Av[1][INDEX(i)] += A[INDEX(j * NSP + i)] * V[1][INDEX(j)];
104  }
105 
106  Av[0][INDEX(i)] *= scale[0];
107  Av[1][INDEX(i)] *= scale[1];
108  }
109 }
110 
112 
113 __device__
114 void matvec_n_by_m_scale_add (const int m, const double scale,
115  const double* __restrict__ A, const double* __restrict__ V,
116  double* __restrict__ Av, const double* __restrict__ add) {
117  //for each row
118  #pragma unroll
119  for (int i = 0; i < NSP; ++i) {
120  Av[INDEX(i)] = A[INDEX(i)] * V[INDEX(0)];
121 
122  //go across a row of A, multiplying by a column of phiHm
123  for (int j = 1; j < m; ++j) {
124  Av[INDEX(i)] += A[INDEX(j * NSP + i)] * V[INDEX(j)];
125  }
126 
127  Av[INDEX(i)] = Av[INDEX(i)] * scale + add[INDEX(i)];
128  }
129 }
130 
132 
133 __device__
134 void matvec_n_by_m_scale_add_subtract (const int m, const double scale,
135  const double* __restrict__ A, const double* V,
136  double* __restrict__ Av, const double* __restrict__ add,
137  const double* __restrict__ sub) {
138  //for each row
139  #pragma unroll
140  for (int i = 0; i < NSP; ++i) {
141  Av[INDEX(i)] = A[INDEX(i)] * V[INDEX(0)];
142 
143  //go across a row of A, multiplying by a column of phiHm
144  #pragma unroll
145  for (int j = 1; j < m; ++j) {
146  Av[INDEX(i)] += A[INDEX(j * NSP + i)] * V[INDEX(j)];
147  }
148 
149  Av[INDEX(i)] = Av[INDEX(i)] * scale + 2.0 * add[INDEX(i)] - sub[INDEX(i)];
150  }
151 }
152 
154 
155 __device__
156 void scale (const double* __restrict__ y0, const double* __restrict__ y1, double* __restrict__ sc) {
157  #pragma unroll
158  for (int i = 0; i < NSP; ++i) {
159  sc[INDEX(i)] = 1.0 / (ATOL + fmax(fabs(y0[INDEX(i)]), fabs(y1[INDEX(i)])) * RTOL);
160  }
161 }
162 
164 
165 __device__
166 void scale_init (const double* __restrict__ y0, double* __restrict__ sc) {
167  #pragma unroll
168  for (int i = 0; i < NSP; ++i) {
169  sc[INDEX(i)] = 1.0 / (ATOL + fabs(y0[INDEX(i)]) * RTOL);
170  }
171 }
172 
174 
175 __device__
176 double sc_norm (const double* __restrict__ nums, const double* __restrict__ sc) {
177  double norm = 0.0;
178 
179  #pragma unroll
180  for (int i = 0; i < NSP; ++i) {
181  norm += nums[INDEX(i)] * nums[INDEX(i)] * (sc[INDEX(i)] * sc[INDEX(i)]);
182  }
183 
184  return sqrt(norm / ((double)NSP));
185 }
186 
187 __device__
188 double two_norm(const double* __restrict__ v)
189 {
190  double norm = 0.0;
191  #pragma unroll
192  for (int i = 0; i < NSP; ++i) {
193  norm += v[INDEX(i)] * v[INDEX(i)];
194  }
195  return sqrt(norm);
196 }
197 
198 __device__
199 double normalize (const double* __restrict__ v, double* __restrict__ v_out) {
200 
201  double norm = two_norm(v);
202 
203  //unlikely to happen, if so, we still need to copy
204  if (norm == 0.0)
205  norm = 1.0;
206 
207  double m_norm = 1.0 / norm;
208 
209  #pragma unroll
210  for (int i = 0; i < NSP; ++i) {
211  v_out[INDEX(i)] = v[INDEX(i)] * m_norm;
212  }
213  return norm;
214 }
215 
216 __device__
217 double dotproduct(const double* __restrict__ w, const double* __restrict__ Vm)
218 {
219  double sum = 0;
220  #pragma unroll
221  for(int i = 0; i < NSP; i++)
222  {
223  sum += w[INDEX(i)] * Vm[INDEX(i)];
224  }
225  return sum;
226 }
227 
228 __device__ void scale_subtract(const double s, const double* __restrict__ Vm, double* __restrict__ w)
229 {
230  #pragma unroll
231  for (int i = 0; i < NSP; i++)
232  {
233  w[INDEX(i)] -= s * Vm[INDEX(i)];
234  }
235 }
236 
237 __device__ void scale_mult(const double s, const double* __restrict__ w, double* __restrict__ Vm)
238 {
239  #pragma unroll
240  for (int i = 0; i < NSP; i++)
241  {
242  Vm[INDEX(i)] = w[INDEX(i)] * s;
243  }
244 }
__device__ double dotproduct(const double *__restrict__ w, const double *__restrict__ Vm)
Performs the dot product of the w (NSP x 1) vector with the given subspace vector (NSP x 1) ...
__device__ void scale_subtract(const double s, const double *__restrict__ Vm, double *__restrict__ w)
Subtracts Vm scaled by s from w.
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
#define NSP
The IVP system size.
Definition: header.cuh:20
#define STRIDE
the matrix dimensions
__device__ void scale_mult(const double s, const double *__restrict__ w, double *__restrict__ Vm)
Sets Vm to s * w.
__device__ void matvec_n_by_m_scale_special2(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
__device__ double normalize(const double *__restrict__ v, double *__restrict__ v_out)
Normalize the input vector using a 2-norm.
#define RTOL
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Definitions of various linear algebra functions needed in the exponential integrators.
#define ATOL
__device__ void matvec_n_by_m_scale_add_subtract(const int m, const double scale, const double *__restrict__ A, const double *V, double *__restrict__ Av, const double *__restrict__ add, const double *__restrict__ sub)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
__device__ void matvec_m_by_m(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized MxM and a vector Mx1.
__device__ void matvec_m_by_m_plusequal(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector plus equals for a matrix of size MxM and vector of size Mx1. That is, it returns (A + I) * v.
__device__ void matvec_n_by_m_scale_special(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24
__device__ double two_norm(const double *__restrict__ v)
Computes and returns the two norm of a vector.