accelerInt  v0.1
complexInverse.cu
Go to the documentation of this file.
1 
10 #include "header.cuh"
11 #include "solver_props.cuh"
12 #include <cuComplex.h>
13 
14 
21 __device__
22 int getComplexMax (const int n, const cuDoubleComplex * __restrict__ complexArr) {
23 
24  int maxInd = 0;
25  if (n == 1)
26  return maxInd;
27 
28  double maxVal = cuCabs(complexArr[INDEX(0)]);
29  for (int i = 1; i < n; ++i) {
30  if (cuCabs(complexArr[INDEX(i)]) > maxVal) {
31  maxInd = i;
32  maxVal = cuCabs(complexArr[INDEX(i)]);
33  }
34  }
35 
36  return maxInd;
37 }
38 
40 
48 __device__
49 void scaleComplex (const int n, const cuDoubleComplex val, cuDoubleComplex* __restrict__ arrX) {
50 
51  for (int i = 0; i < n; ++i) {
52  arrX[INDEX(i)] = cuCmul(arrX[INDEX(i)], val);
53  }
54 
55 }
56 
58 
59 /*
60 __device__
61 void swapComplex (const int n, cuDoubleComplex* __restrict__ arrX, const int incX,
62  cuDoubleComplex* __restrict__ arrY, const int incY) {
63 
64  int ix = 0;
65  int iy = 0;
66 
67  for (int i = 0; i < n; ++i) {
68  cuDoubleComplex temp = arrX[INDEX(ix)];
69  arrX[INDEX(ix)] = arrY[INDEX(iy)];
70  arrY[INDEX(iy)] = temp;
71  ix += incX;
72  iy += incY;
73  }
74 
75 }*/
76 
78 
101 __device__
102 void complexGERU (const int n, const cuDoubleComplex alpha, const cuDoubleComplex* arrX,
103  const cuDoubleComplex* arrY, const int incY, cuDoubleComplex* A, const int lda) {
104 
105  for (int j = 0; j < n; ++j) {
106  if (cuCabs(arrY[INDEX(j * incY)]) > 0.0) {
107 
108  cuDoubleComplex temp = cuCmul(alpha, arrY[INDEX(j * incY)]);
109 
110  for (int i = 0; i < n; ++i) {
111  A[INDEX(i + (lda * j))] = cuCfma(arrX[INDEX(i)], temp, A[INDEX(i + (lda * j))]);
112  }
113 
114  }
115  }
116 
117 }
118 
120 
137 __device__
138 void multiplyComplexUpperMV (const int n, cuDoubleComplex* x, const int lda, const cuDoubleComplex* A) {
139 
140  for (int j = 0; j < n; ++j) {
141  if (cuCabs(x[INDEX(j)]) > 0.0) {
142  cuDoubleComplex temp = x[INDEX(j)];
143  for (int i = 0; i < j; ++i) {
144  //x[i] += temp * A[i + (lda * j)];
145  x[INDEX(i)] = cuCfma(temp, A[INDEX(i + (lda * j))], x[INDEX(i)]);
146  }
147  //x[j] *= A[j + (lda * j)];
148  x[INDEX(j)] = cuCmul(x[INDEX(j)], A[INDEX(j + (lda * j))]);
149  }
150  }
151 
152 }
153 
155 
175 __device__
176 void complexGEMV (const int m, const int n, const int lda, const cuDoubleComplex alpha, const cuDoubleComplex* A,
177  const cuDoubleComplex* arrX, cuDoubleComplex* arrY) {
178 
179  // first: y = beta*y
180  // beta = 1, so nothing
181 
182  // second: y = alpha*A*x + y
183 
184  for (int j = 0; j < n - 1; ++j) {
185 
186  if (cuCabs(arrX[INDEX(j)]) > 0.0) {
187  cuDoubleComplex temp = cuCmul(alpha, arrX[INDEX(j)]);
188  for (int i = 0; i < m; ++i) {
189  //arrY[i] += temp * A[i + (m * j)];
190  arrY[INDEX(i)] = cuCfma(temp, A[INDEX(i + (lda * j))], arrY[INDEX(i)]);
191  }
192  }
193  }
194 
195 }
196 
198 
215 __device__
216 void getComplexLU (const int n, cuDoubleComplex* __restrict__ A,
217  int* __restrict__ indPivot, int* __restrict__ info) {
218 
219  for (int j = 0; j < n; ++j) {
220 
221  // find pivot and test for singularity
222 
223  int jp = j + getComplexMax (n - j, &A[GRID_DIM * (j + (STRIDE * j))]);
224  indPivot[INDEX(j)] = jp;
225 
226  if (cuCabs(A[INDEX(jp + (STRIDE * j))]) > 0.0) {
227 
228  // apply interchange to columns 1:n-1
229  if (jp != j)
230  {
231  for (int i = 0; i < n; ++i) {
232  cuDoubleComplex temp = A[INDEX(STRIDE * i + j)];
233  A[INDEX(STRIDE * i + j)] = A[INDEX(STRIDE * i + jp)];
234  A[INDEX(STRIDE * i + jp)] = temp;
235  }
236  //swapComplex (n, &A[GRID_DIM * (j)], STRIDE, &A[GRID_DIM * (jp)], STRIDE);
237  }
238 
239  // compute elements j+1:m-1 of the jth column
240 
241  if (j < STRIDE - 1)
242  scaleComplex (n - j - 1, cuCdiv(make_cuDoubleComplex(1.0, 0.0), A[INDEX(j + (STRIDE * j))]), &A[GRID_DIM * (j + 1 + (STRIDE * j))]);
243 
244  } else if (*info == 0) {
245  *info = j;
246  break;
247  }
248 
249  // update trailing submatrix
250  if (j < n - 1)
251  complexGERU (n - j - 1, make_cuDoubleComplex(-1.0, 0.0), &A[GRID_DIM * (j + 1 + (STRIDE * j))], &A[GRID_DIM * (j + STRIDE * (j + 1))], STRIDE, &A[GRID_DIM * (j + 1 + STRIDE * (j + 1))], STRIDE);
252 
253  }
254 }
255 
269 __device__
270 int getComplexInverseLU (const int n, cuDoubleComplex* __restrict__ A,
271  const int* __restrict__ indPivot,
272  cuDoubleComplex* __restrict__ work) {
273 
274  // form inv(U)
275  for (int j = 0; j < n; ++j) {
276  if (cuCabs(A[INDEX(j + (STRIDE * j))]) == 0)
277  return j;
278  A[INDEX(j + (STRIDE * j))] = cuCdiv(make_cuDoubleComplex(1.0, 0.0), A[INDEX(j + (STRIDE * j))]);
279  cuDoubleComplex Ajj = cuCmul(make_cuDoubleComplex(-1.0, 0.0), A[INDEX(j + (STRIDE * j))]);
280 
281  // compute elements 0:j-1 of jth column
282  multiplyComplexUpperMV (j, &A[GRID_DIM * (STRIDE * j)], STRIDE, A);
283 
284  // scale
285  scaleComplex (j, Ajj, &A[GRID_DIM * (STRIDE * j)]);
286  }
287 
288  // solve equation inv(A)*L = inv(U) for inv(A)
289 
290  for (int j = n - 1; j >= 0; --j) {
291 
292  // copy current column of L to work and replace with 0.0s
293  for (int i = j + 1; i < n; ++i) {
294  work[INDEX(i)] = A[INDEX(i + (STRIDE * j))];
295  A[INDEX(i + (STRIDE * j))] = make_cuDoubleComplex(0.0, 0.0);
296  }
297 
298  // compute current column of inv(A)
299  if (j < n - 1)
300  complexGEMV (n, n - j, STRIDE, make_cuDoubleComplex(-1.0, 0.0), &A[GRID_DIM * (STRIDE * (j + 1))], &work[GRID_DIM * (j + 1)], &A[GRID_DIM * (STRIDE * j)]);
301 
302  }
303 
304  // apply column interchanges
305 
306  for (int j = n - 2; j >= 0; --j) {
307 
308  int jp = indPivot[INDEX(j)];
309  if (jp != j)
310  {
311  for (int i = 0; i < n; ++i) {
312  cuDoubleComplex temp = A[INDEX(STRIDE * j + i)];
313  A[INDEX(STRIDE * j + i)] = A[INDEX(STRIDE * jp + i)];
314  A[INDEX(STRIDE * jp + i)] = temp;
315  }
316  }
317  }
318  return 0;
319 }
320 
331 __device__
332 void getComplexInverse (const int n, cuDoubleComplex* __restrict__ A,
333  int* __restrict__ ipiv, int* __restrict__ info,
334  cuDoubleComplex* __restrict__ work) {
335 
336  // first get LU factorization
337  getComplexLU (n, A, ipiv, info);
338 
339  // check for successful exit
340  if (*info != 0) {
341  return;
342  }
343 
344  // now get inverse
345  *info = getComplexInverseLU (n, A, ipiv, work);
346 }
347 
366 __device__
367 void getHessenbergLU(const int n, cuDoubleComplex* A, int* __restrict__ indPivot, int* __restrict__ info)
368 {
369  int last_pivot = 0;
370  for (int i = 0; i < n - 1; i ++)
371  {
372  if (cuCabs(A[INDEX(i * STRIDE + i)]) < cuCabs(A[INDEX(i * STRIDE + i + 1)]))
373  {
374  //swap rows
375  for(int k = 0; k < n; ++k)
376  {
377  if (k >= last_pivot)
378  {
379  cuDoubleComplex temp = A[INDEX(k * STRIDE + i)];
380  A[INDEX(k * STRIDE + i)] = A[INDEX(k * STRIDE + i + 1)];
381  A[INDEX(k * STRIDE + i + 1)] = temp;
382  }
383  }
384  indPivot[INDEX(i)] = i + 1;
385  }
386  else
387  {
388  indPivot[INDEX(i)] = i;
389  last_pivot = i;
390  }
391  if (cuCabs(A[INDEX(i * STRIDE + i)]) > 0.0)
392  {
393  cuDoubleComplex tau = cuCdiv(A[INDEX(i * STRIDE + i + 1)], A[INDEX(i * STRIDE + i)]);
394  for (int j = i + 1; j < n; j++)
395  {
396  A[INDEX(j * STRIDE + i + 1)] = cuCsub(A[INDEX(j * STRIDE + i + 1)], cuCmul(tau, A[INDEX(j * STRIDE + i)]));
397  }
398  A[INDEX(i * STRIDE + i + 1)] = tau;
399  }
400  else
401  {
402  *info = i;
403  return;
404  }
405  }
406  //last index is not pivoted
407  indPivot[INDEX(n - 1)] = n - 1;
408 }
409 
420 __device__
421 void getComplexInverseHessenberg (const int n, cuDoubleComplex* __restrict__ A,
422  int* __restrict__ ipiv, int* __restrict__ info,
423  cuDoubleComplex* __restrict__ work)
424 {
425  // first get LU factorization
426  getHessenbergLU (n, A, ipiv, info);
427 
428  if (*info != 0)
429  return;
430 
431  // now get inverse
432  *info = getComplexInverseLU (n, A, ipiv, work);
433 }
__device__ void getHessenbergLU(const int n, cuDoubleComplex *A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x STRIDE) Hessenberg Matrix using partial pivoting with row int...
__device__ void scaleComplex(const int n, const cuDoubleComplex val, cuDoubleComplex *__restrict__ arrX)
scaleComplex scales a vector (with increment equal to one) by a constant val.
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Definition: gpu_macros.cuh:20
#define STRIDE
the matrix dimensions
simple convenience file to include the correct solver properties file
__device__ void complexGEMV(const int m, const int n, const int lda, const cuDoubleComplex alpha, const cuDoubleComplex *A, const cuDoubleComplex *arrX, cuDoubleComplex *arrY)
Computes the matrix-vector operation where alpha is a scalar, x and y are vectors and A is an m by n...
__device__ int getComplexMax(const int n, const cuDoubleComplex *__restrict__ complexArr)
getComplexMax finds the index of the first element having maximum absolute value. ...
An example header file that defines system size, memory functions and other required methods for inte...
__device__ void getComplexInverseHessenberg(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ ipiv, int *__restrict__ info, cuDoubleComplex *__restrict__ work)
getComplexInverseHessenberg computes the inverse of an upper Hessenberg matrix A using a LU factoriza...
__device__ void getComplexLU(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges...
__device__ void complexGERU(const int n, const cuDoubleComplex alpha, const cuDoubleComplex *arrX, const cuDoubleComplex *arrY, const int incY, cuDoubleComplex *A, const int lda)
complexGERU performs the rank 1 operation where alpha is a scalar, arrX and arrY are n element vecto...
__device__ void getComplexInverse(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ ipiv, int *__restrict__ info, cuDoubleComplex *__restrict__ work)
getComplexInverse computes the inverse of an a general matrix A using a LU factorization method ...
__device__ void multiplyComplexUpperMV(const int n, cuDoubleComplex *x, const int lda, const cuDoubleComplex *A)
Performs the matrix-vector operation .
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24
__device__ int getComplexInverseLU(const int n, cuDoubleComplex *__restrict__ A, const int *__restrict__ indPivot, cuDoubleComplex *__restrict__ work)
getComplexInverseLU computes the inverse of a matrix using the LU factorization computed by getHessen...