accelerInt/inverse_8cu_source.html

 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
 #include <string.h>

 #include "header.cuh"
 #include "solver_props.cuh"


 __device__
 int getMax (const int n, const double * __restrict__ Arr) {

     int maxInd = 0;
     if (n == 1)
         return maxInd;

     double maxVal = fabs(Arr[INDEX(0)]);
     for (int i = 1; i < n; ++i) {
         if (fabs(Arr[INDEX(i)]) > maxVal) {
             maxInd = i;
             maxVal = fabs(Arr[INDEX(i)]);
         }
     }

     return maxInd;
 }


 __device__
 void scale (const int n, const double val, double* __restrict__ arrX) {

     for (int i = 0; i < n; ++i) {
         arrX[INDEX(i)] *= val;
     }

 }


 __device__
 void swap (const int n, double* __restrict__ arrX, const int incX, double* __restrict__ arrY, const int incY) {

     int ix = 0;
     int iy = 0;

     for (int i = 0; i < n; ++i) {
         double temp = arrX[INDEX(ix)];
         arrX[INDEX(ix)] = arrY[INDEX(iy)];
         arrY[INDEX(iy)] = temp;
         ix += incX;
         iy += incY;
     }

 }


 __device__
 void GERU (const int n, const double alpha, const double* __restrict__ arrX,
                 const double* __restrict__ arrY, const int incY, double* __restrict__ A, const int lda) {

     for (int j = 0; j < n; ++j) {
         if (fabs(arrY[INDEX(j * incY)]) > 0.0) {

             double temp = alpha * arrY[INDEX(j * incY)];

             for (int i = 0; i < n; ++i) {
                 A[INDEX(i + (lda * j))] += arrX[INDEX(i)] * temp;
             }

         }
     }

 }


 __device__
 void getLU (const int n, double* __restrict__ A, int* __restrict__ indPivot, int* __restrict__ info) {

     for (int j = 0; j < n; ++j) {

         // find pivot and test for singularity

         int jp = j + getMax (n - j, &A[GRID_DIM * (j + (STRIDE * j))]);
         indPivot[INDEX(j)] = jp;

         if (fabs(A[INDEX(jp + (STRIDE * j))]) > 0.0) {

             // apply interchange to columns 1:n-1
             if (jp != j)
                 swap(n, &A[GRID_DIM * (j)], STRIDE, &A[GRID_DIM * (jp)], STRIDE);

             // compute elements j+1:m-1 of the jth column

             if (j < n - 1)
                 scale(n - j - 1, 1.0 / A[INDEX(j + (STRIDE * j))], &A[GRID_DIM * (j + 1 + (STRIDE * j))]);

         } else if (*info == 0) {
             *info = j;
             break;
         }

         // update trailing submatrix
         if (j < n - 1)
             GERU (n - j - 1, -1.0, &A[GRID_DIM * (j + 1 + (STRIDE * j))], &A[GRID_DIM * (j + STRIDE * (j + 1))], STRIDE, &A[GRID_DIM * (j + 1 + STRIDE * (j + 1))], STRIDE);
     }
 }
GRID_DIM
#define GRID_DIM
The total number of threads in the Grid, provides an offset between vector entries.
Definition: gpu_macros.cuh:20

STRIDE
#define STRIDE
the matrix dimensions
Definition: radau2a_props.cuh:20

swap
__device__ void swap(const int n, double *__restrict__ arrX, const int incX, double *__restrict__ arrY, const int incY)
interchanges two vectors arrX and arrY.
Definition: inverse.cu:70

GERU
__device__ void GERU(const int n, const double alpha, const double *__restrict__ arrX, const double *__restrict__ arrY, const int incY, double *__restrict__ A, const int lda)
GERU performs the rank 1 operation  where alpha is a scalar, arrX and arrY are n element vectors...
Definition: inverse.cu:109

solver_props.cuh
simple convenience file to include the correct solver properties file

getMax
__device__ int getMax(const int n, const double *__restrict__ Arr)
getMax finds the index of the first element having maximum absolute value.
Definition: inverse.cu:23

header.cuh
An example header file that defines system size, memory functions and other required methods for inte...

getLU
__device__ void getLU(const int n, double *__restrict__ A, int *__restrict__ indPivot, int *__restrict__ info)
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges...
Definition: inverse.cu:145

INDEX
#define INDEX(i)
Convenience macro to get the value of a vector at index i, calculated as i * GRID_DIM + T_ID...
Definition: gpu_macros.cuh:24

scale
__device__ void scale(const int n, const double val, double *__restrict__ arrX)
scale multiplies a vector (with increment equal to one) by a constant val.
Definition: inverse.cu:50