diff --git a/src/ztrsm_LLN.jdf b/src/ztrsm_LLN.jdf index b4cbc025..5615824c 100644 --- a/src/ztrsm_LLN.jdf +++ b/src/ztrsm_LLN.jdf @@ -42,13 +42,39 @@ ztrsm(k,n) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, k), CL -> descB(k,n) -> (descB->mt>=(k+2)) ? D zgemm(k, (k+1)..(descB->mt-1), n) -BODY +BODY [type=HIP] { int tempkm = ((k)==(descB->mt-1)) ? (descB->m-(k*descB->mb)) : descB->mb; int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, k ); + int ldb = BLKLDD( descB, k ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempkm, tempnn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + +BODY +{ + int tempkm = ((k)==(descB->mt-1)) ? (descB->m-(k*descB->mb)) : descB->mb; + int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; int lda = BLKLDD( descA, k ); int ldb = BLKLDD( descB, k ); + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); CORE_ztrsm(side, uplo, trans, diag, tempkm, tempnn, lalpha, diff --git a/src/ztrsm_LLT.jdf b/src/ztrsm_LLT.jdf index ddd19154..8c8216e0 100644 --- a/src/ztrsm_LLT.jdf +++ b/src/ztrsm_LLT.jdf @@ -42,13 +42,39 @@ ztrsm(k,n) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, k), CL -> descB((descB->mt-1)-k,n) -> (descB->mt>=(2+k)) ? D zgemm(k, (k+1)..(descB->mt-1), n) -BODY +BODY [type=HIP] { int tempkm = ((k)==(0)) ? (descB->m-((descB->mt-1)*descB->mb)) : descB->mb; int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, (descB->mt-1)-k ); + int ldb = BLKLDD( descB, (descB->mt-1)-k ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempkm, tempnn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + +BODY +{ + int tempkm = ((k)==(0)) ? (descB->m-((descB->mt-1)*descB->mb)) : descB->mb; + int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; int lda = BLKLDD( descA, (descB->mt-1)-k ); int ldb = BLKLDD( descB, (descB->mt-1)-k ); + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); CORE_ztrsm(side, uplo, trans, diag, tempkm, tempnn, lalpha, diff --git a/src/ztrsm_LUN.jdf b/src/ztrsm_LUN.jdf index 6a0871f6..6a4885f3 100644 --- a/src/ztrsm_LUN.jdf +++ b/src/ztrsm_LUN.jdf @@ -42,13 +42,39 @@ ztrsm(k,n) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, k), CL -> descB((descB->mt-1)-k,n) -> (descB->mt>=(2+k)) ? D zgemm(k, (k+1)..(descB->mt-1), n) -BODY +BODY [type=HIP] { int tempkm = ((k)==(0)) ? (descB->m-((descB->mt-1)*descB->mb)) : descB->mb; int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, (descB->mt-1)-k ); + int ldb = BLKLDD( descB, (descB->mt-1)-k ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempkm, tempnn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + +BODY +{ + int tempkm = ((k)==(0)) ? (descB->m-((descB->mt-1)*descB->mb)) : descB->mb; + int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; int lda = BLKLDD( descA, (descB->mt-1)-k ); int ldb = BLKLDD( descB, (descB->mt-1)-k ); + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); CORE_ztrsm(side, uplo, trans, diag, tempkm, tempnn, lalpha, diff --git a/src/ztrsm_LUT.jdf b/src/ztrsm_LUT.jdf index eb8fc324..4d0edb56 100644 --- a/src/ztrsm_LUT.jdf +++ b/src/ztrsm_LUT.jdf @@ -42,13 +42,39 @@ ztrsm(k,n) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, k), CL -> descB(k,n) -> (descB->mt>=(k+2)) ? D zgemm(k, (k+1)..(descB->mt-1), n) -BODY +BODY [type=HIP] { int tempkm = ((k)==(descB->mt-1)) ? (descB->m-(k*descB->mb)) : descB->mb; int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, k ); + int ldb = BLKLDD( descB, k ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempkm, tempnn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + +BODY +{ + int tempkm = ((k)==(descB->mt-1)) ? (descB->m-(k*descB->mb)) : descB->mb; + int tempnn = ((n)==(descB->nt-1)) ? (descB->n-(n*descB->nb)) : descB->nb; int lda = BLKLDD( descA, k ); int ldb = BLKLDD( descB, k ); + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); CORE_ztrsm(side, uplo, trans, diag, tempkm, tempnn, lalpha, diff --git a/src/ztrsm_RLN.jdf b/src/ztrsm_RLN.jdf index 8c00d7c7..dc7fee47 100644 --- a/src/ztrsm_RLN.jdf +++ b/src/ztrsm_RLN.jdf @@ -42,13 +42,39 @@ ztrsm(k,m) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, m), CL -> (descB->nt>=(2+k)) ? C zgemm(k, m, (k+1)..(descB->nt-1)) -> descB(m,(descB->nt-1)-k) +BODY [type=HIP] +{ + int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; + int tempkn = ((k)==(0)) ? (descB->n-((descB->nt-1)*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, (descB->nt-1)-k ); + int ldb = BLKLDD( descB, m ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempmm, tempkn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + BODY { int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; int tempkn = ((k)==(0)) ? (descB->n-((descB->nt-1)*descB->nb)) : descB->nb; - dplasma_complex64_t lalpha = (k==0) ? alpha : (dplasma_complex64_t)1.0; int lda = BLKLDD( descA, (descB->nt-1)-k ); int ldb = BLKLDD( descB, m ); + dplasma_complex64_t lalpha = (k==0) ? alpha : (dplasma_complex64_t)1.0; CORE_ztrsm(side, uplo, trans, diag, tempmm, tempkn, lalpha, diff --git a/src/ztrsm_RLT.jdf b/src/ztrsm_RLT.jdf index 42f19639..e4a63ef8 100644 --- a/src/ztrsm_RLT.jdf +++ b/src/ztrsm_RLT.jdf @@ -42,6 +42,29 @@ ztrsm(k,m) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, m), CL -> (descB->nt>=(k+2)) ? C zgemm(k, m, (k+1) .. (descB->nt-1)) -> descB(m,k) +BODY [type=HIP] +{ + int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; + int tempkn = ((k)==(descB->nt-1)) ? (descB->n-(k*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, k ); + int ldb = BLKLDD( descB, m ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {creal(alpha), cimag(alpha)}; +#else + dplasma_complex64_t lalpha = alpha; +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempmm, tempkn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + BODY { int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; diff --git a/src/ztrsm_RUN.jdf b/src/ztrsm_RUN.jdf index 1031922d..95743111 100644 --- a/src/ztrsm_RUN.jdf +++ b/src/ztrsm_RUN.jdf @@ -42,13 +42,39 @@ ztrsm(k,m) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, m), CL -> (descB->nt>=(k+2)) ? C zgemm(k, m, (k+1) .. (descB->nt-1)) -> descB(m,k) +BODY [type=HIP] +{ + int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; + int tempkn = ((k)==(descB->nt-1)) ? (descB->n-(k*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, k ); + int ldb = BLKLDD( descB, m ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {1., 0.}; + if(k == 0) { + lalpha.x = creal(alpha); lalpha.y = cimag(alpha); + } +#else + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)(1.0); +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempmm, tempkn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + BODY { int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; int tempkn = ((k)==(descB->nt-1)) ? (descB->n-(k*descB->nb)) : descB->nb; - dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)1.0; int lda = BLKLDD( descA, k ); int ldb = BLKLDD( descB, m ); + dplasma_complex64_t lalpha = ((k)==(0)) ? (alpha) : (dplasma_complex64_t)1.0; CORE_ztrsm(side, uplo, trans, diag, tempmm, tempkn, lalpha, diff --git a/src/ztrsm_RUT.jdf b/src/ztrsm_RUT.jdf index 4d2ec1a1..d9189b73 100644 --- a/src/ztrsm_RUT.jdf +++ b/src/ztrsm_RUT.jdf @@ -42,6 +42,29 @@ ztrsm(k,m) [ flops = inline_c %{ return FLOPS_ZTRSM(side, CLEAN_MB(descB, m), CL -> (descB->nt>=(2+k)) ? C zgemm(k, m, (k+1)..(descB->nt-1)) -> descB(m,(descB->nt-1)-k) +BODY [type=HIP] +{ + int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb; + int tempkn = ((k)==(0)) ? (descB->n-((descB->nt-1)*descB->nb)) : descB->nb; + int lda = BLKLDD( descA, (descB->nt-1)-k ); + int ldb = BLKLDD( descB, m ); +#if defined(PRECISION_z) || defined(PRECISION_c) + hipblasDoubleComplex lalpha = {creal(alpha), cimag(alpha)}; +#else + dplasma_complex64_t lalpha = alpha; +#endif + + hipblasStatus_t status; + dplasma_hip_handles_t *handles = parsec_info_get(&gpu_stream->infos, hip_handles_infokey); + assert(NULL != handles); + status = hipblasZtrsm( handles->hipblas_handle, dplasma_hipblas_side(side), dplasma_hipblas_fill(uplo), dplasma_hipblas_op(trans), dplasma_hipblas_diag(diag), + tempmm, tempkn, &lalpha, + (hipblasDoubleComplex*)A, lda, + (hipblasDoubleComplex*)B, ldb); + DPLASMA_HIPBLAS_CHECK_ERROR( "hipblasZtrsm ", status, {return PARSEC_HOOK_RETURN_ERROR;} ); +} +END + BODY { int tempmm = ((m)==(descB->mt-1)) ? (descB->m-(m*descB->mb)) : descB->mb;