![]() |
MAGMA
2.7.0
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
| void | magmablas_cgetmatrix_transpose (magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloatComplex_const_ptr dAT, magma_int_t ldda, magmaFloatComplex *hA, magma_int_t lda, magmaFloatComplex_ptr dwork, magma_int_t lddw, magma_queue_t queues[2]) |
| Copy and transpose matrix dAT on GPU device to hA on CPU host. More... | |
| void | magmablas_cgetmatrix_transpose_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloatComplex_const_ptr const dAT[], magma_int_t ldda, magmaFloatComplex *hA, magma_int_t lda, magmaFloatComplex_ptr dwork[], magma_int_t lddw, magma_queue_t queues[][2]) |
| Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host. More... | |
| void | magmablas_dgetmatrix_transpose (magma_int_t m, magma_int_t n, magma_int_t nb, magmaDouble_const_ptr dAT, magma_int_t ldda, double *hA, magma_int_t lda, magmaDouble_ptr dwork, magma_int_t lddw, magma_queue_t queues[2]) |
| Copy and transpose matrix dAT on GPU device to hA on CPU host. More... | |
| void | magmablas_dgetmatrix_transpose_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDouble_const_ptr const dAT[], magma_int_t ldda, double *hA, magma_int_t lda, magmaDouble_ptr dwork[], magma_int_t lddw, magma_queue_t queues[][2]) |
| Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host. More... | |
| void | magmablas_sgetmatrix_transpose (magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloat_const_ptr dAT, magma_int_t ldda, float *hA, magma_int_t lda, magmaFloat_ptr dwork, magma_int_t lddw, magma_queue_t queues[2]) |
| Copy and transpose matrix dAT on GPU device to hA on CPU host. More... | |
| void | magmablas_sgetmatrix_transpose_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloat_const_ptr const dAT[], magma_int_t ldda, float *hA, magma_int_t lda, magmaFloat_ptr dwork[], magma_int_t lddw, magma_queue_t queues[][2]) |
| Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host. More... | |
| void | magmablas_zgetmatrix_transpose (magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex_const_ptr dAT, magma_int_t ldda, magmaDoubleComplex *hA, magma_int_t lda, magmaDoubleComplex_ptr dwork, magma_int_t lddw, magma_queue_t queues[2]) |
| Copy and transpose matrix dAT on GPU device to hA on CPU host. More... | |
| void | magmablas_zgetmatrix_transpose_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex_const_ptr const dAT[], magma_int_t ldda, magmaDoubleComplex *hA, magma_int_t lda, magmaDoubleComplex_ptr dwork[], magma_int_t lddw, magma_queue_t queues[][2]) |
| Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host. More... | |
| void magmablas_cgetmatrix_transpose | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaFloatComplex_const_ptr | dAT, | ||
| magma_int_t | ldda, | ||
| magmaFloatComplex * | hA, | ||
| magma_int_t | lda, | ||
| magmaFloatComplex_ptr | dwork, | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[2] | ||
| ) |
Copy and transpose matrix dAT on GPU device to hA on CPU host.
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | The n-by-m matrix A^T on the GPU, of dimension (ldda,m). |
| [in] | ldda | Leading dimension of matrix dAT. ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Workspace on the GPU, of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | Array of two queues, to pipeline operation. |
| void magmablas_cgetmatrix_transpose_mgpu | ( | magma_int_t | ngpu, |
| magma_int_t | m, | ||
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaFloatComplex_const_ptr const | dAT[], | ||
| magma_int_t | ldda, | ||
| magmaFloatComplex * | hA, | ||
| magma_int_t | lda, | ||
| magmaFloatComplex_ptr | dwork[], | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[][2] | ||
| ) |
Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host.
| [in] | ngpu | Number of GPUs over which dAT is distributed. |
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | Array of ngpu pointers, one per GPU, that store the disributed n-by-m matrix A^T on the GPUs, each of dimension (ldda,m). |
| [in] | ldda | Leading dimension of each matrix dAT on each GPU. ngpu*ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Array of ngpu pointers, one per GPU, that store the workspaces on each GPU, each of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | 2D array of dimension (ngpu,2), with two queues per GPU. |
| void magmablas_dgetmatrix_transpose | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaDouble_const_ptr | dAT, | ||
| magma_int_t | ldda, | ||
| double * | hA, | ||
| magma_int_t | lda, | ||
| magmaDouble_ptr | dwork, | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[2] | ||
| ) |
Copy and transpose matrix dAT on GPU device to hA on CPU host.
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | The n-by-m matrix A^T on the GPU, of dimension (ldda,m). |
| [in] | ldda | Leading dimension of matrix dAT. ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Workspace on the GPU, of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | Array of two queues, to pipeline operation. |
| void magmablas_dgetmatrix_transpose_mgpu | ( | magma_int_t | ngpu, |
| magma_int_t | m, | ||
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaDouble_const_ptr const | dAT[], | ||
| magma_int_t | ldda, | ||
| double * | hA, | ||
| magma_int_t | lda, | ||
| magmaDouble_ptr | dwork[], | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[][2] | ||
| ) |
Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host.
| [in] | ngpu | Number of GPUs over which dAT is distributed. |
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | Array of ngpu pointers, one per GPU, that store the disributed n-by-m matrix A^T on the GPUs, each of dimension (ldda,m). |
| [in] | ldda | Leading dimension of each matrix dAT on each GPU. ngpu*ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Array of ngpu pointers, one per GPU, that store the workspaces on each GPU, each of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | 2D array of dimension (ngpu,2), with two queues per GPU. |
| void magmablas_sgetmatrix_transpose | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaFloat_const_ptr | dAT, | ||
| magma_int_t | ldda, | ||
| float * | hA, | ||
| magma_int_t | lda, | ||
| magmaFloat_ptr | dwork, | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[2] | ||
| ) |
Copy and transpose matrix dAT on GPU device to hA on CPU host.
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | The n-by-m matrix A^T on the GPU, of dimension (ldda,m). |
| [in] | ldda | Leading dimension of matrix dAT. ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Workspace on the GPU, of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | Array of two queues, to pipeline operation. |
| void magmablas_sgetmatrix_transpose_mgpu | ( | magma_int_t | ngpu, |
| magma_int_t | m, | ||
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaFloat_const_ptr const | dAT[], | ||
| magma_int_t | ldda, | ||
| float * | hA, | ||
| magma_int_t | lda, | ||
| magmaFloat_ptr | dwork[], | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[][2] | ||
| ) |
Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host.
| [in] | ngpu | Number of GPUs over which dAT is distributed. |
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | Array of ngpu pointers, one per GPU, that store the disributed n-by-m matrix A^T on the GPUs, each of dimension (ldda,m). |
| [in] | ldda | Leading dimension of each matrix dAT on each GPU. ngpu*ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Array of ngpu pointers, one per GPU, that store the workspaces on each GPU, each of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | 2D array of dimension (ngpu,2), with two queues per GPU. |
| void magmablas_zgetmatrix_transpose | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaDoubleComplex_const_ptr | dAT, | ||
| magma_int_t | ldda, | ||
| magmaDoubleComplex * | hA, | ||
| magma_int_t | lda, | ||
| magmaDoubleComplex_ptr | dwork, | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[2] | ||
| ) |
Copy and transpose matrix dAT on GPU device to hA on CPU host.
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | The n-by-m matrix A^T on the GPU, of dimension (ldda,m). |
| [in] | ldda | Leading dimension of matrix dAT. ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Workspace on the GPU, of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | Array of two queues, to pipeline operation. |
| void magmablas_zgetmatrix_transpose_mgpu | ( | magma_int_t | ngpu, |
| magma_int_t | m, | ||
| magma_int_t | n, | ||
| magma_int_t | nb, | ||
| magmaDoubleComplex_const_ptr const | dAT[], | ||
| magma_int_t | ldda, | ||
| magmaDoubleComplex * | hA, | ||
| magma_int_t | lda, | ||
| magmaDoubleComplex_ptr | dwork[], | ||
| magma_int_t | lddw, | ||
| magma_queue_t | queues[][2] | ||
| ) |
Copy and transpose matrix dAT, which is distributed row block cyclic over multiple GPUs, to hA on CPU host.
| [in] | ngpu | Number of GPUs over which dAT is distributed. |
| [in] | m | Number of rows of output matrix hA. m >= 0. |
| [in] | n | Number of columns of output matrix hA. n >= 0. |
| [in] | nb | Block size. nb >= 0. |
| [in] | dAT | Array of ngpu pointers, one per GPU, that store the disributed n-by-m matrix A^T on the GPUs, each of dimension (ldda,m). |
| [in] | ldda | Leading dimension of each matrix dAT on each GPU. ngpu*ldda >= n. |
| [out] | hA | The m-by-n matrix A on the CPU, of dimension (lda,n). |
| [in] | lda | Leading dimension of matrix hA. lda >= m. |
| [out] | dwork | Array of ngpu pointers, one per GPU, that store the workspaces on each GPU, each of dimension (2*lddw*nb). |
| [in] | lddw | Leading dimension of dwork. lddw >= m. |
| [in] | queues | 2D array of dimension (ngpu,2), with two queues per GPU. |