accelerInt
v0.1
|
CUDA LU decomposition implementation. More...
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include "header.cuh"
#include "solver_props.cuh"
Go to the source code of this file.
Functions | |
__device__ int | getMax (const int n, const double *__restrict__ Arr) |
getMax finds the index of the first element having maximum absolute value. More... | |
__device__ void | scale (const int n, const double val, double *__restrict__ arrX) |
scale multiplies a vector (with increment equal to one) by a constant val. More... | |
__device__ void | swap (const int n, double *__restrict__ arrX, const int incX, double *__restrict__ arrY, const int incY) |
interchanges two vectors arrX and arrY. More... | |
__device__ void | GERU (const int n, const double alpha, const double *__restrict__ arrX, const double *__restrict__ arrY, const int incY, double *__restrict__ A, const int lda) |
GERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix. More... | |
__device__ void | getLU (const int n, double *__restrict__ A, int *__restrict__ indPivot, int *__restrict__ info) |
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges. More... | |
CUDA LU decomposition implementation.
Definition in file inverse.cu.
__device__ void GERU | ( | const int | n, |
const double | alpha, | ||
const double *__restrict__ | arrX, | ||
const double *__restrict__ | arrY, | ||
const int | incY, | ||
double *__restrict__ | A, | ||
const int | lda | ||
) |
GERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix.
[in] | n | The matrix/vector size |
[in] | alpha | The value to scale by |
[in] | arrX | arrX is an array of dimension at least n. Before entry, the incremented array arrX must contain the n element vector x. |
[in] | arrY | arrY is an array of dimension at least 1 + (n - 1) * incY. Before entry, the incremented array arrY must contain the n element vector y. |
[in] | incY | On entry, INCY specifies the increment for the elements of arrY. incY must not be zero. |
[out] | A | A is an array of dimension (lda x n). Before entry, the leading n by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix. |
[in] | lda | On entry, lda specifies the first dimension of A as declared in the calling (sub) program. lda must be at least max( 1, n ). |
Definition at line 109 of file inverse.cu.
__device__ void getLU | ( | const int | n, |
double *__restrict__ | A, | ||
int *__restrict__ | indPivot, | ||
int *__restrict__ | info | ||
) |
Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges.
[in] | n | The matrix size |
[in,out] | A | The matrix to factorize (n x n) with stride defined in solver_props.h |
[out] | indPivot | indPivot is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i]. & |
[out] | info | An information variable |
The factorization has the form: \(A = P * L * U\) where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
Definition at line 145 of file inverse.cu.
__device__ int getMax | ( | const int | n, |
const double *__restrict__ | Arr | ||
) |
getMax finds the index of the first element having maximum absolute value.
[in] | n | The size of Arr |
[in] | Arr | The (nx1) vector to determine the maximum value of |
Definition at line 23 of file inverse.cu.
__device__ void scale | ( | const int | n, |
const double | val, | ||
double *__restrict__ | arrX | ||
) |
scale multiplies a vector (with increment equal to one) by a constant val.
[in] | n | The vector size |
[out] | val | The value to scale by |
[out] | arrX | The vector to scale |
Definition at line 50 of file inverse.cu.
__device__ void swap | ( | const int | n, |
double *__restrict__ | arrX, | ||
const int | incX, | ||
double *__restrict__ | arrY, | ||
const int | incY | ||
) |
interchanges two vectors arrX and arrY.
[in] | n | the vector size |
[in] | arrX | the first vector to swap |
[in] | incX | the increment of the arrX vector |
[in] | arrY | the second vector to swap |
[in] | incY | the increment of the arrY vector |
Definition at line 70 of file inverse.cu.