accelerInt  v0.1
inverse.cu
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <math.h>
8 #include <float.h>
9 #include <string.h>
10 
11 #include "header.cuh"
12 #include "solver_props.cuh"
13 
15 
22 __device__
23 int getMax (const int n, const double * __restrict__ Arr) {
24 
25  int maxInd = 0;
26  if (n == 1)
27  return maxInd;
28 
29  double maxVal = fabs(Arr[INDEX(0)]);
30  for (int i = 1; i < n; ++i) {
31  if (fabs(Arr[INDEX(i)]) > maxVal) {
32  maxInd = i;
33  maxVal = fabs(Arr[INDEX(i)]);
34  }
35  }
36 
37  return maxInd;
38 }
39 
41 
49 __device__
50 void scale (const int n, const double val, double* __restrict__ arrX) {
51 
52  for (int i = 0; i < n; ++i) {
53  arrX[INDEX(i)] *= val;
54  }
55 
56 }
57 
59 
69 __device__
70 void swap (const int n, double* __restrict__ arrX, const int incX, double* __restrict__ arrY, const int incY) {
71 
72  int ix = 0;
73  int iy = 0;
74 
75  for (int i = 0; i < n; ++i) {
76  double temp = arrX[INDEX(ix)];
77  arrX[INDEX(ix)] = arrY[INDEX(iy)];
78  arrY[INDEX(iy)] = temp;
79  ix += incX;
80  iy += incY;
81  }
82 
83 }
84 
86 
108 __device__
109 void GERU (const int n, const double alpha, const double* __restrict__ arrX,
110  const double* __restrict__ arrY, const int incY, double* __restrict__ A, const int lda) {
111 
112  for (int j = 0; j < n; ++j) {
113  if (fabs(arrY[INDEX(j * incY)]) > 0.0) {
114 
115  double temp = alpha * arrY[INDEX(j * incY)];
116 
117  for (int i = 0; i < n; ++i) {
118  A[INDEX(i + (lda * j))] += arrX[INDEX(i)] * temp;
119  }
120 
121  }
122  }
123 
124 }
125 
127 
144 __device__
145 void getLU (const int n, double* __restrict__ A, int* __restrict__ indPivot, int* __restrict__ info) {
146 
147  for (int j = 0; j < n; ++j) {
148 
149  // find pivot and test for singularity
150 
151  int jp = j + getMax (n - j, &A[GRID_DIM * (j + (STRIDE * j))]);
152  indPivot[INDEX(j)] = jp;
153 
154  if (fabs(A[INDEX(jp + (STRIDE * j))]) > 0.0) {
155 
156  // apply interchange to columns 1:n-1
157  if (jp != j)
158  swap(n, &A[GRID_DIM * (j)], STRIDE, &A[GRID_DIM * (jp)], STRIDE);
159 
160  // compute elements j+1:m-1 of the jth column
161 
162  if (j < n - 1)
163  scale(n - j - 1, 1.0 / A[INDEX(j + (STRIDE * j))], &A[GRID_DIM * (j + 1 + (STRIDE * j))]);
164 
165  } else if (*info == 0) {
166  *info = j;
167  break;
168  }
169 
170  // update trailing submatrix
171  if (j < n - 1)
172  GERU (n - j - 1, -1.0, &A[GRID_DIM * (j + 1 + (STRIDE * j))], &A[GRID_DIM * (j + STRIDE * (j + 1))], STRIDE, &A[GRID_DIM * (j + 1 + STRIDE * (j + 1))], STRIDE);
173  }
174 }
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Definition: gpu_macros.cuh:20
#define STRIDE
the matrix dimensions
__device__ void swap(const int n, double *__restrict__ arrX, const int incX, double *__restrict__ arrY, const int incY)
interchanges two vectors arrX and arrY.
Definition: inverse.cu:70
__device__ void GERU(const int n, const double alpha, const double *__restrict__ arrX, const double *__restrict__ arrY, const int incY, double *__restrict__ A, const int lda)
GERU performs the rank 1 operation where alpha is a scalar, arrX and arrY are n element vectors...
Definition: inverse.cu:109
simple convenience file to include the correct solver properties file
__device__ int getMax(const int n, const double *__restrict__ Arr)
getMax finds the index of the first element having maximum absolute value.
Definition: inverse.cu:23
An example header file that defines system size, memory functions and other required methods for inte...
__device__ void getLU(const int n, double *__restrict__ A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges...
Definition: inverse.cu:145
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24
__device__ void scale(const int n, const double val, double *__restrict__ arrX)
scale multiplies a vector (with increment equal to one) by a constant val.
Definition: inverse.cu:50