CUDA LU decomposition implementation. More...

#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include "header.cuh"
#include "solver_props.cuh"

Include dependency graph for inverse.cu:

Go to the source code of this file.

Functions
__device__ int	getMax (const int n, const double *__restrict__ Arr)
	getMax finds the index of the first element having maximum absolute value. More...

__device__ void	scale (const int n, const double val, double *__restrict__ arrX)
	scale multiplies a vector (with increment equal to one) by a constant val. More...

__device__ void	swap (const int n, double __restrict__ arrX, const int incX, double __restrict__ arrY, const int incY)
	interchanges two vectors arrX and arrY. More...

__device__ void	GERU (const int n, const double alpha, const double __restrict__ arrX, const double __restrict__ arrY, const int incY, double *__restrict__ A, const int lda)
	GERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix. More...

__device__ void	getLU (const int n, double __restrict__ A, int __restrict__ indPivot, int *__restrict__ info)
	Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges. More...

Detailed Description

CUDA LU decomposition implementation.

Definition in file inverse.cu.

Function Documentation

◆ GERU()

__device__ void GERU	(	const int	n,
		const double	alpha,
		const double *__restrict__	arrX,
		const double *__restrict__	arrY,
		const int	incY,
		double *__restrict__	A,
		const int	lda
	)

GERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix.

Parameters

[in]	n	The matrix/vector size
[in]	alpha	The value to scale by
[in]	arrX	arrX is an array of dimension at least n. Before entry, the incremented array arrX must contain the n element vector x.
[in]	arrY	arrY is an array of dimension at least 1 + (n - 1) * incY. Before entry, the incremented array arrY must contain the n element vector y.
[in]	incY	On entry, INCY specifies the increment for the elements of arrY. incY must not be zero.
[out]	A	A is an array of dimension (lda x n). Before entry, the leading n by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix.
[in]	lda	On entry, lda specifies the first dimension of A as declared in the calling (sub) program. lda must be at least max( 1, n ).

Definition at line 109 of file inverse.cu.

◆ getLU()

__device__ void getLU	(	const int	n,
		double *__restrict__	A,
		int *__restrict__	indPivot,
		int *__restrict__	info
	)

Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges.

See also: STRIDE

Parameters

[in]	n	The matrix size
[in,out]	A	The matrix to factorize (n x n) with stride defined in solver_props.h

See also: STRIDE

Parameters

[out]	indPivot	indPivot is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i]. &
[out]	info	An information variable

The factorization has the form: \(A = P * L * U\) where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

Definition at line 145 of file inverse.cu.

◆ getMax()

__device__ int getMax	(	const int	n,
		const double *__restrict__	Arr
	)

getMax finds the index of the first element having maximum absolute value.

Parameters

[in]	n	The size of Arr
[in]	Arr	The (nx1) vector to determine the maximum value of

Definition at line 23 of file inverse.cu.

◆ scale()

__device__ void scale	(	const int	n,
		const double	val,
		double *__restrict__	arrX
	)

scale multiplies a vector (with increment equal to one) by a constant val.

Parameters

[in]	n	The vector size
[out]	val	The value to scale by
[out]	arrX	The vector to scale

Definition at line 50 of file inverse.cu.

◆ swap()

__device__ void swap	(	const int	n,
		double *__restrict__	arrX,
		const int	incX,
		double *__restrict__	arrY,
		const int	incY
	)

interchanges two vectors arrX and arrY.

Parameters

[in]	n	the vector size
[in]	arrX	the first vector to swap
[in]	incX	the increment of the arrX vector
[in]	arrY	the second vector to swap
[in]	incY	the increment of the arrY vector

Definition at line 70 of file inverse.cu.

Functions

Detailed Description

Function Documentation

◆ GERU()

◆ getLU()

◆ getMax()

◆ scale()

◆ swap()