Implementation of LU factorization of complex (variable-sized) matricies for CUDA. More...

#include "header.cuh"
#include "solver_props.cuh"
#include <cuComplex.h>

Include dependency graph for complexInverse.cu:

Functions
__device__ int	getComplexMax (const int n, const cuDoubleComplex *__restrict__ complexArr)
	getComplexMax finds the index of the first element having maximum absolute value. More...

__device__ void	scaleComplex (const int n, const cuDoubleComplex val, cuDoubleComplex *__restrict__ arrX)
	scaleComplex scales a vector (with increment equal to one) by a constant val. More...

__device__ void	complexGERU (const int n, const cuDoubleComplex alpha, const cuDoubleComplex arrX, const cuDoubleComplex arrY, const int incY, cuDoubleComplex *A, const int lda)
	complexGERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix More...

__device__ void	multiplyComplexUpperMV (const int n, cuDoubleComplex x, const int lda, const cuDoubleComplex A)
	Performs the matrix-vector operation \(x_v:= A*x_v\). More...

__device__ void	complexGEMV (const int m, const int n, const int lda, const cuDoubleComplex alpha, const cuDoubleComplex A, const cuDoubleComplex arrX, cuDoubleComplex *arrY)
	Computes the matrix-vector operation \(alphaAx + y\) where alpha is a scalar, x and y are vectors and A is an m by n matrix. More...

__device__ void	getComplexLU (const int n, cuDoubleComplex __restrict__ A, int __restrict__ indPivot, int *__restrict__ info)
	Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges. More...

__device__ int	getComplexInverseLU (const int n, cuDoubleComplex __restrict__ A, const int __restrict__ indPivot, cuDoubleComplex *__restrict__ work)
	getComplexInverseLU computes the inverse of a matrix using the LU factorization computed by getHessenbergLU or getComplexLU. More...

__device__ void	getComplexInverse (const int n, cuDoubleComplex __restrict__ A, int __restrict__ ipiv, int __restrict__ info, cuDoubleComplex __restrict__ work)
	getComplexInverse computes the inverse of an a general matrix A using a LU factorization method More...

__device__ void	getHessenbergLU (const int n, cuDoubleComplex A, int __restrict__ indPivot, int *__restrict__ info)
	Computes the LU factorization of a (n x STRIDE) Hessenberg Matrix using partial pivoting with row interchanges. More...

__device__ void	getComplexInverseHessenberg (const int n, cuDoubleComplex __restrict__ A, int __restrict__ ipiv, int __restrict__ info, cuDoubleComplex __restrict__ work)
	getComplexInverseHessenberg computes the inverse of an upper Hessenberg matrix A using a LU factorization method More...

Detailed Description

Implementation of LU factorization of complex (variable-sized) matricies for CUDA.

Adapted from Lapack LU factorization and inversion routines

Author: Nick Curtis

Definition in file complexInverse.cu.

Function Documentation

◆ complexGEMV()

__device__ void complexGEMV	(	const int	m,
		const int	n,
		const int	lda,
		const cuDoubleComplex	alpha,
		const cuDoubleComplex *	A,
		const cuDoubleComplex *	arrX,
		cuDoubleComplex *	arrY
	)

Computes the matrix-vector operation \(alpha*A*x + y\) where alpha is a scalar, x and y are vectors and A is an m by n matrix.

Parameters

[in]	m	On entry, M specifies the number of rows of the matrix A. Must be >= 0
[out]	n	On entry, N specifies the number of columns of the matrix A. Must be >= 0
[in]	lda	The stride of the matrix

See also: STRIDE

Parameters

[in]	alpha	The scalar value
[in]	A	A is an array of dimension (lda, n). Before entry, the leading m by n part of the array A must contain the matrix of coefficients.
[in]	arrX	arrX is an array of dimension at least (n) Before entry, the incremented array arrX must contain the vector x.
[in,out]	arrY	arrY is an array of dimension at least (m). Before entry, the incremented array arrY must contain the vector y. On exit, arrY is overwritten by the updated vector y.

Note: These pointers cannot use the __restrict__ modifier, as they may overlap

Definition at line 176 of file complexInverse.cu.

◆ complexGERU()

__device__ void complexGERU	(	const int	n,
		const cuDoubleComplex	alpha,
		const cuDoubleComplex *	arrX,
		const cuDoubleComplex *	arrY,
		const int	incY,
		cuDoubleComplex *	A,
		const int	lda
	)

complexGERU performs the rank 1 operation \(A := alpha * arrX * arrY **T + A\) where alpha is a scalar, arrX and arrY are n element vectors, and A is a (lda x n) matrix

Parameters

[in]	n	The matrix/vector size
[in]	alpha	The value to scale by
[in]	arrX	arrX is an array of dimension at least n. Before entry, the incremented array arrX must contain the n element vector x.
[in]	arrY	arrY is an array of dimension at least 1 + (n - 1) * incY. Before entry, the incremented array arrY must contain the n element vector y.
[in]	incY	On entry, INCY specifies the increment for the elements of arrY. incY must not be zero.
[out]	A	A is an array of dimension (lda x n). Before entry, the leading n by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix.
[in]	lda	On entry, lda specifies the first dimension of A as declared in the calling (sub) program. lda must be at least max( 1, n ).

Definition at line 102 of file complexInverse.cu.

◆ getComplexInverse()

__device__ void getComplexInverse	(	const int	n,
		cuDoubleComplex *__restrict__	A,
		int *__restrict__	ipiv,
		int *__restrict__	info,
		cuDoubleComplex *__restrict__	work
	)

getComplexInverse computes the inverse of an a general matrix A using a LU factorization method

Parameters

[in]	n	The order of the matrix A. n >= 0.
[in,out]	A	The array, dimension (STRIDE, n)

See also: STRIDE

Parameters

[out]	ipiv	ipiv is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i].
[out]	info	If not zero, an error occured during facotrization
[out]	work	A work array that will be overwritten

Definition at line 332 of file complexInverse.cu.

◆ getComplexInverseHessenberg()

__device__ void getComplexInverseHessenberg	(	const int	n,
		cuDoubleComplex *__restrict__	A,
		int *__restrict__	ipiv,
		int *__restrict__	info,
		cuDoubleComplex *__restrict__	work
	)

getComplexInverseHessenberg computes the inverse of an upper Hessenberg matrix A using a LU factorization method

Parameters

[in]	n	The order of the matrix A. n >= 0.
[in,out]	A	The array, dimension (STRIDE, n)

See also: STRIDE

Parameters

[out]	ipiv	ipiv is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i].
[out]	info	If not zero, an error occured during factorization
[out]	work	A work array that will be overwritten

Definition at line 421 of file complexInverse.cu.

◆ getComplexInverseLU()

__device__ int getComplexInverseLU	(	const int	n,
		cuDoubleComplex *__restrict__	A,
		const int *__restrict__	indPivot,
		cuDoubleComplex *__restrict__	work
	)

getComplexInverseLU computes the inverse of a matrix using the LU factorization computed by getHessenbergLU or getComplexLU.

This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A).

Parameters

[in]	n	The order of the matrix A. n >= 0.
[in,out]	A	The array, dimension (STRIDE, n)

See also: STRIDE

Parameters

[in]	indPivot	indPivot is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i].
[out]	work	A work array that will be overwritten

Definition at line 270 of file complexInverse.cu.

◆ getComplexLU()

__device__ void getComplexLU	(	const int	n,
		cuDoubleComplex *__restrict__	A,
		int *__restrict__	indPivot,
		int *__restrict__	info
	)

Computes the LU factorization of a (n x n) matrix using partial pivoting with row interchanges.

See also: STRIDE

Parameters

[in]	n	The matrix size
[in,out]	A	The matrix to factorize (n x n) with stride defined in solver_props.h

See also: STRIDE

Parameters

[out]	indPivot	indPivot is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i]. &
[out]	info	An information variable

The factorization has the form: \(A = P * L * U\) where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

Definition at line 216 of file complexInverse.cu.

◆ getComplexMax()

__device__ int getComplexMax	(	const int	n,
		const cuDoubleComplex *__restrict__	complexArr
	)

getComplexMax finds the index of the first element having maximum absolute value.

Parameters

[in]	n	The size of complexArr
[in]	complexArr	The (nx1) vector to determine the maximum value of

Definition at line 22 of file complexInverse.cu.

◆ getHessenbergLU()

__device__ void getHessenbergLU	(	const int	n,
		cuDoubleComplex *	A,
		int *__restrict__	indPivot,
		int *__restrict__	info
	)

Computes the LU factorization of a (n x STRIDE) Hessenberg Matrix using partial pivoting with row interchanges.

See also: STRIDE

Parameters

[in]	n	The matrix size
[in,out]	A	The matrix to factorize (nxn) with stride defined in solver_props.h

See also: STRIDE

Parameters

[out]	indPivot	indPivot is an array of dimension (n). The pivot indices from getHessenbergLU; for 0<=i<=n-1, row i of the matrix was interchanged with row indPiv[i].
[out]	info	If not zero, an error occured during factorization

The factorization has the form: \(A = P * L * U\) where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). For full reference see: G. W. Stewart, Matrix Algorithms: Volume 1: Basic Decompositions, SIAM, Philadelphia, 1998. doi:10.1137/1.9781611971408.

Definition at line 367 of file complexInverse.cu.

◆ multiplyComplexUpperMV()

__device__ void multiplyComplexUpperMV	(	const int	n,
		cuDoubleComplex *	x,
		const int	lda,
		const cuDoubleComplex *	A
	)

Performs the matrix-vector operation \(x_v:= A*x_v\).

Parameters

[in]	n	On entry, n specifies the order of the matrix A. n must be at least zero.
[out]	x	x is an array of dimension at least (n). Before entry, the incremented array X must contain the n element vector \(x_v\). On exit, X is overwritten with the transformed vector \(x_v\).
[in]	lda	The stride of the matrix

See also: STRIDE

Parameters

[in] A A is an array of dimension (lda, n). Before entry the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.

Note: These pointers can't use the __restrict__ attribute, as they may overlap

Definition at line 138 of file complexInverse.cu.

◆ scaleComplex()

__device__ void scaleComplex	(	const int	n,
		const cuDoubleComplex	val,
		cuDoubleComplex *__restrict__	arrX
	)

scaleComplex scales a vector (with increment equal to one) by a constant val.

Parameters

[in]	n	The vector size
[out]	val	The value to scale by
[out]	arrX	The vector to scale

Definition at line 49 of file complexInverse.cu.

Functions

Detailed Description

Function Documentation

◆ complexGEMV()

◆ complexGERU()

◆ getComplexInverse()

◆ getComplexInverseHessenberg()

◆ getComplexInverseLU()

◆ getComplexLU()

◆ getComplexMax()

◆ getHessenbergLU()

◆ multiplyComplexUpperMV()

◆ scaleComplex()