12 #include <cuComplex.h> 22 int getComplexMax (
const int n,
const cuDoubleComplex * __restrict__ complexArr) {
28 double maxVal = cuCabs(complexArr[
INDEX(0)]);
29 for (
int i = 1; i < n; ++i) {
30 if (cuCabs(complexArr[
INDEX(i)]) > maxVal) {
32 maxVal = cuCabs(complexArr[
INDEX(i)]);
49 void scaleComplex (
const int n,
const cuDoubleComplex val, cuDoubleComplex* __restrict__ arrX) {
51 for (
int i = 0; i < n; ++i) {
102 void complexGERU (
const int n,
const cuDoubleComplex alpha,
const cuDoubleComplex* arrX,
103 const cuDoubleComplex* arrY,
const int incY, cuDoubleComplex* A,
const int lda) {
105 for (
int j = 0; j < n; ++j) {
106 if (cuCabs(arrY[
INDEX(j * incY)]) > 0.0) {
108 cuDoubleComplex temp = cuCmul(alpha, arrY[
INDEX(j * incY)]);
110 for (
int i = 0; i < n; ++i) {
111 A[
INDEX(i + (lda * j))] = cuCfma(arrX[
INDEX(i)], temp, A[
INDEX(i + (lda * j))]);
140 for (
int j = 0; j < n; ++j) {
141 if (cuCabs(x[
INDEX(j)]) > 0.0) {
142 cuDoubleComplex temp = x[
INDEX(j)];
143 for (
int i = 0; i < j; ++i) {
176 void complexGEMV (
const int m,
const int n,
const int lda,
const cuDoubleComplex alpha,
const cuDoubleComplex* A,
177 const cuDoubleComplex* arrX, cuDoubleComplex* arrY) {
184 for (
int j = 0; j < n - 1; ++j) {
186 if (cuCabs(arrX[
INDEX(j)]) > 0.0) {
187 cuDoubleComplex temp = cuCmul(alpha, arrX[
INDEX(j)]);
188 for (
int i = 0; i < m; ++i) {
190 arrY[
INDEX(i)] = cuCfma(temp, A[
INDEX(i + (lda * j))], arrY[
INDEX(i)]);
217 int* __restrict__ indPivot,
int* __restrict__ info) {
219 for (
int j = 0; j < n; ++j) {
224 indPivot[
INDEX(j)] = jp;
231 for (
int i = 0; i < n; ++i) {
244 }
else if (*info == 0) {
271 const int* __restrict__ indPivot,
272 cuDoubleComplex* __restrict__ work) {
275 for (
int j = 0; j < n; ++j) {
279 cuDoubleComplex Ajj = cuCmul(make_cuDoubleComplex(-1.0, 0.0), A[
INDEX(j + (
STRIDE * j))]);
290 for (
int j = n - 1; j >= 0; --j) {
293 for (
int i = j + 1; i < n; ++i) {
295 A[
INDEX(i + (
STRIDE * j))] = make_cuDoubleComplex(0.0, 0.0);
306 for (
int j = n - 2; j >= 0; --j) {
308 int jp = indPivot[
INDEX(j)];
311 for (
int i = 0; i < n; ++i) {
333 int* __restrict__ ipiv,
int* __restrict__ info,
334 cuDoubleComplex* __restrict__ work) {
367 void getHessenbergLU(
const int n, cuDoubleComplex* A,
int* __restrict__ indPivot,
int* __restrict__ info)
370 for (
int i = 0; i < n - 1; i ++)
375 for(
int k = 0; k < n; ++k)
384 indPivot[
INDEX(i)] = i + 1;
388 indPivot[
INDEX(i)] = i;
394 for (
int j = i + 1; j < n; j++)
407 indPivot[
INDEX(n - 1)] = n - 1;
422 int* __restrict__ ipiv,
int* __restrict__ info,
423 cuDoubleComplex* __restrict__ work)
__device__ void getHessenbergLU(const int n, cuDoubleComplex *A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x STRIDE) Hessenberg Matrix using partial pivoting with row int...
__device__ void scaleComplex(const int n, const cuDoubleComplex val, cuDoubleComplex *__restrict__ arrX)
scaleComplex scales a vector (with increment equal to one) by a constant val.
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
#define STRIDE
the matrix dimensions
simple convenience file to include the correct solver properties file
__device__ void complexGEMV(const int m, const int n, const int lda, const cuDoubleComplex alpha, const cuDoubleComplex *A, const cuDoubleComplex *arrX, cuDoubleComplex *arrY)
Computes the matrix-vector operation where alpha is a scalar, x and y are vectors and A is an m by n...
__device__ int getComplexMax(const int n, const cuDoubleComplex *__restrict__ complexArr)
getComplexMax finds the index of the first element having maximum absolute value. ...
__device__ void getComplexInverseHessenberg(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ ipiv, int *__restrict__ info, cuDoubleComplex *__restrict__ work)
getComplexInverseHessenberg computes the inverse of an upper Hessenberg matrix A using a LU factoriza...
__device__ void getComplexLU(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges...
__device__ void complexGERU(const int n, const cuDoubleComplex alpha, const cuDoubleComplex *arrX, const cuDoubleComplex *arrY, const int incY, cuDoubleComplex *A, const int lda)
complexGERU performs the rank 1 operation where alpha is a scalar, arrX and arrY are n element vecto...
__device__ void getComplexInverse(const int n, cuDoubleComplex *__restrict__ A, int *__restrict__ ipiv, int *__restrict__ info, cuDoubleComplex *__restrict__ work)
getComplexInverse computes the inverse of an a general matrix A using a LU factorization method ...
__device__ void multiplyComplexUpperMV(const int n, cuDoubleComplex *x, const int lda, const cuDoubleComplex *A)
Performs the matrix-vector operation .
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
__device__ int getComplexInverseLU(const int n, cuDoubleComplex *__restrict__ A, const int *__restrict__ indPivot, cuDoubleComplex *__restrict__ work)
getComplexInverseLU computes the inverse of a matrix using the LU factorization computed by getHessen...