164c682b65cd04ac83b51251b40dca14423df351aTim Murray/* 264c682b65cd04ac83b51251b40dca14423df351aTim Murray * Copyright (C) 2012 The Android Open Source Project 364c682b65cd04ac83b51251b40dca14423df351aTim Murray * 464c682b65cd04ac83b51251b40dca14423df351aTim Murray * Licensed under the Apache License, Version 2.0 (the "License"); 564c682b65cd04ac83b51251b40dca14423df351aTim Murray * you may not use this file except in compliance with the License. 664c682b65cd04ac83b51251b40dca14423df351aTim Murray * You may obtain a copy of the License at 764c682b65cd04ac83b51251b40dca14423df351aTim Murray * 864c682b65cd04ac83b51251b40dca14423df351aTim Murray * http://www.apache.org/licenses/LICENSE-2.0 964c682b65cd04ac83b51251b40dca14423df351aTim Murray * 1064c682b65cd04ac83b51251b40dca14423df351aTim Murray * Unless required by applicable law or agreed to in writing, software 1164c682b65cd04ac83b51251b40dca14423df351aTim Murray * distributed under the License is distributed on an "AS IS" BASIS, 1264c682b65cd04ac83b51251b40dca14423df351aTim Murray * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1364c682b65cd04ac83b51251b40dca14423df351aTim Murray * See the License for the specific language governing permissions and 1464c682b65cd04ac83b51251b40dca14423df351aTim Murray * limitations under the License. 1564c682b65cd04ac83b51251b40dca14423df351aTim Murray */ 1664c682b65cd04ac83b51251b40dca14423df351aTim Murray 1764c682b65cd04ac83b51251b40dca14423df351aTim Murray 1864c682b65cd04ac83b51251b40dca14423df351aTim Murray#include "rsCpuIntrinsic.h" 1964c682b65cd04ac83b51251b40dca14423df351aTim Murray#include "rsCpuIntrinsicInlines.h" 20e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#include "rsCpuBLASDispatch.h" 2199d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang#include "eight_bit_int_gemm.h" 2264c682b65cd04ac83b51251b40dca14423df351aTim Murray 2364c682b65cd04ac83b51251b40dca14423df351aTim Murrayusing namespace android; 2464c682b65cd04ac83b51251b40dca14423df351aTim Murrayusing namespace android::renderscript; 2564c682b65cd04ac83b51251b40dca14423df351aTim Murray 2664c682b65cd04ac83b51251b40dca14423df351aTim Murraynamespace android { 2764c682b65cd04ac83b51251b40dca14423df351aTim Murraynamespace renderscript { 2864c682b65cd04ac83b51251b40dca14423df351aTim Murray 2964c682b65cd04ac83b51251b40dca14423df351aTim Murray 3064c682b65cd04ac83b51251b40dca14423df351aTim Murrayclass RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic { 3164c682b65cd04ac83b51251b40dca14423df351aTim Murraypublic: 32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void invokeForEach(uint32_t slot, 33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines const Allocation ** ain, 34c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines uint32_t inLen, 35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines Allocation * aout, 36c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines const void * usr, 37c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines uint32_t usrLen, 38c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines const RsScriptCall *sc) override; 3964c682b65cd04ac83b51251b40dca14423df351aTim Murray 40c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void populateScript(Script *) override; 41c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines ~RsdCpuScriptIntrinsicBLAS() override; 4264c682b65cd04ac83b51251b40dca14423df351aTim Murray RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s); 4364c682b65cd04ac83b51251b40dca14423df351aTim Murray 4464c682b65cd04ac83b51251b40dca14423df351aTim Murrayprotected: 4564c682b65cd04ac83b51251b40dca14423df351aTim Murray 46aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray uint8_t a_offset = 0; 47aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray uint8_t b_offset = 0; 48aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray uint8_t c_offset = 0; 49aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 50e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#ifdef RS_COMPATIBILITY_LIB 51e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang bool isBlasLibInitialized = false; 52e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#endif 532b999883f2f390ee43ed18317d77c810a0c6657bTim Murray static void kernelBNNM(size_t m, size_t n, size_t k, 5406deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang const uint8_t* a, uint8_t a_offset, size_t lda, 5506deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang const uint8_t* b, uint8_t b_offset, size_t ldb, 5606deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang uint8_t* c, int32_t c_offset, size_t ldc, 5706deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang int32_t c_mult_int); 58aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 59aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 6064c682b65cd04ac83b51251b40dca14423df351aTim Murray 6164c682b65cd04ac83b51251b40dca14423df351aTim Murray}; 6264c682b65cd04ac83b51251b40dca14423df351aTim Murray 6364c682b65cd04ac83b51251b40dca14423df351aTim Murray} 6464c682b65cd04ac83b51251b40dca14423df351aTim Murray} 6564c682b65cd04ac83b51251b40dca14423df351aTim Murray 6664c682b65cd04ac83b51251b40dca14423df351aTim Murrayvoid RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) { 6764c682b65cd04ac83b51251b40dca14423df351aTim Murray s->mHal.info.exportedVariableCount = 0; 6864c682b65cd04ac83b51251b40dca14423df351aTim Murray} 6964c682b65cd04ac83b51251b40dca14423df351aTim Murray 7064c682b65cd04ac83b51251b40dca14423df351aTim Murraystatic void initABC(const Allocation ** ain, 7164c682b65cd04ac83b51251b40dca14423df351aTim Murray size_t size, 7264c682b65cd04ac83b51251b40dca14423df351aTim Murray void** A, 7364c682b65cd04ac83b51251b40dca14423df351aTim Murray void** B, 7464c682b65cd04ac83b51251b40dca14423df351aTim Murray void** C, 7564c682b65cd04ac83b51251b40dca14423df351aTim Murray int* lda, 7664c682b65cd04ac83b51251b40dca14423df351aTim Murray int* ldb, 7764c682b65cd04ac83b51251b40dca14423df351aTim Murray int* ldc) 7864c682b65cd04ac83b51251b40dca14423df351aTim Murray{ 7964c682b65cd04ac83b51251b40dca14423df351aTim Murray if (ain[0]) { 8064c682b65cd04ac83b51251b40dca14423df351aTim Murray *A = ain[0]->mHal.drvState.lod[0].mallocPtr; 8164c682b65cd04ac83b51251b40dca14423df351aTim Murray *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size); 8264c682b65cd04ac83b51251b40dca14423df351aTim Murray } 8364c682b65cd04ac83b51251b40dca14423df351aTim Murray if (ain[1]) { 8464c682b65cd04ac83b51251b40dca14423df351aTim Murray *B = ain[1]->mHal.drvState.lod[0].mallocPtr; 8564c682b65cd04ac83b51251b40dca14423df351aTim Murray *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size); 8664c682b65cd04ac83b51251b40dca14423df351aTim Murray } 8764c682b65cd04ac83b51251b40dca14423df351aTim Murray if (ain[2]) { 8864c682b65cd04ac83b51251b40dca14423df351aTim Murray *C = ain[2]->mHal.drvState.lod[0].mallocPtr; 8964c682b65cd04ac83b51251b40dca14423df351aTim Murray *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size); 9064c682b65cd04ac83b51251b40dca14423df351aTim Murray } 9164c682b65cd04ac83b51251b40dca14423df351aTim Murray 9264c682b65cd04ac83b51251b40dca14423df351aTim Murray 9364c682b65cd04ac83b51251b40dca14423df351aTim Murray} 9464c682b65cd04ac83b51251b40dca14423df351aTim Murray 9564c682b65cd04ac83b51251b40dca14423df351aTim Murrayvoid RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot, 9664c682b65cd04ac83b51251b40dca14423df351aTim Murray const Allocation ** ain, 9764c682b65cd04ac83b51251b40dca14423df351aTim Murray uint32_t inLen, 9864c682b65cd04ac83b51251b40dca14423df351aTim Murray Allocation * aout, 9964c682b65cd04ac83b51251b40dca14423df351aTim Murray const void * usr, 10064c682b65cd04ac83b51251b40dca14423df351aTim Murray uint32_t usrLen, 10164c682b65cd04ac83b51251b40dca14423df351aTim Murray const RsScriptCall *sc) { 10264c682b65cd04ac83b51251b40dca14423df351aTim Murray RsBlasCall* call = (RsBlasCall*) usr; 10364c682b65cd04ac83b51251b40dca14423df351aTim Murray // setup BLAS enum args 10464c682b65cd04ac83b51251b40dca14423df351aTim Murray enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA; 10564c682b65cd04ac83b51251b40dca14423df351aTim Murray enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB; 10664c682b65cd04ac83b51251b40dca14423df351aTim Murray enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo; 10764c682b65cd04ac83b51251b40dca14423df351aTim Murray enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag; 10864c682b65cd04ac83b51251b40dca14423df351aTim Murray enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side; 10964c682b65cd04ac83b51251b40dca14423df351aTim Murray 11064c682b65cd04ac83b51251b40dca14423df351aTim Murray void *A = nullptr; 11164c682b65cd04ac83b51251b40dca14423df351aTim Murray void *B = nullptr; 11264c682b65cd04ac83b51251b40dca14423df351aTim Murray void *C = nullptr; 11364c682b65cd04ac83b51251b40dca14423df351aTim Murray void *X = nullptr; 11464c682b65cd04ac83b51251b40dca14423df351aTim Murray void *Y = nullptr; 11564c682b65cd04ac83b51251b40dca14423df351aTim Murray 11664c682b65cd04ac83b51251b40dca14423df351aTim Murray int lda = 0, ldb = 0, ldc = 0; 11764c682b65cd04ac83b51251b40dca14423df351aTim Murray 118e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#ifdef RS_COMPATIBILITY_LIB 119e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang // Allow BNNM even without libblas 120e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang if (call->func != RsBlas_bnnm && !isBlasLibInitialized) { 121e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang if (!loadBLASLib()) { 122e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n"); 123e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang return; 124e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang } 125e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang isBlasLibInitialized = true; 126e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang } 127e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#endif 128e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang 12964c682b65cd04ac83b51251b40dca14423df351aTim Murray switch (call->func) { 13064c682b65cd04ac83b51251b40dca14423df351aTim Murray 13164c682b65cd04ac83b51251b40dca14423df351aTim Murray // Level 1 BLAS: returns into a 1D Allocation 13264c682b65cd04ac83b51251b40dca14423df351aTim Murray 13364c682b65cd04ac83b51251b40dca14423df351aTim Murray 13464c682b65cd04ac83b51251b40dca14423df351aTim Murray // Level 2 BLAS 13564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sgemv): 136b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 13764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A, 13864c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 13964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 14064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sgbmv): 141b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 14264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 14364c682b65cd04ac83b51251b40dca14423df351aTim Murray call->alpha.f, (float*)A, lda, (float*)X, call->incX, 14464c682b65cd04ac83b51251b40dca14423df351aTim Murray call->beta.f, (float*)Y, call->incY); 14564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 14664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_strmv): 14764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 14864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 14964c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)X, call->incX); 15064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 15164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_stbmv): 15264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 15364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A, 15464c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)X, call->incX); 15564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 15664c682b65cd04ac83b51251b40dca14423df351aTim Murray // stpmv takes a packed 1D Allocation only 15764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_stpmv): 15864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 15964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 16064c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)X, call->incX); 16164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 16264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_strsv): 16364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 16464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda, 16564c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)X, call->incX); 16664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 16764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_stbsv): 16864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 16964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A, 17064c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)X, call->incX); 17164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 17264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_stpsv): 17364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 17464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 17564c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)X, call->incX); 17664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 17764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dgemv): 178b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 17964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A, 18064c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 18164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 18264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dgbmv): 183b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 18464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 18564c682b65cd04ac83b51251b40dca14423df351aTim Murray call->alpha.d, (double*)A, lda, (double*)X, call->incX, 18664c682b65cd04ac83b51251b40dca14423df351aTim Murray call->beta.d, (double*)Y, call->incY); 18764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 18864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtrmv): 18964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 19064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 19164c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)X, call->incX); 19264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 19364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtbmv): 19464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 19564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A, 19664c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)X, call->incX); 19764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 19864c682b65cd04ac83b51251b40dca14423df351aTim Murray // stpmv takes a packed 1D Allocation only 19964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtpmv): 20064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 20164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 20264c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)X, call->incX); 20364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 20464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtrsv): 20564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 20664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda, 20764c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)X, call->incX); 20864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 20964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtbsv): 21064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 21164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A, 21264c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)X, call->incX); 21364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 21464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtpsv): 21564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 21664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 21764c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)X, call->incX); 21864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 21964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cgemv): 220b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 22164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A, 22264c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY); 22364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 22464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cgbmv): 225b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 22664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 22764c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX, 22864c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)&call->beta.c, (void*)Y, call->incY); 22964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 23064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctrmv): 23164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 23264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 23364c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 23464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 23564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctbmv): 23664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 23764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 23864c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 23964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 24064c682b65cd04ac83b51251b40dca14423df351aTim Murray // stpmv takes a packed 1D Allocation only 24164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctpmv): 24264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 24364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 24464c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 24564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 24664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctrsv): 24764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 24864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda, 24964c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 25064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 25164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctbsv): 25264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 25364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 25464c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 25564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 25664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctpsv): 25764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 25864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 25964c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 26064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 26164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zgemv): 262b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 26364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A, 26464c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY); 26564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 26664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zgbmv): 267b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 26864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 26964c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX, 27064c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)&call->beta.z, (void*)Y, call->incY); 27164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 27264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztrmv): 27364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 27464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 27564c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 27664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 27764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztbmv): 27864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 27964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 28064c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 28164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 28264c682b65cd04ac83b51251b40dca14423df351aTim Murray // stpmv takes a packed 1D Allocation only 28364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztpmv): 28464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 28564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 28664c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 28764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 28864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztrsv): 28964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 29064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda, 29164c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 29264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 29364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztbsv): 29464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 29564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 29664c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)X, call->incX); 29764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 29864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztpsv): 29964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 30064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 30164c682b65cd04ac83b51251b40dca14423df351aTim Murray (void*)X, call->incX); 30264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 30364c682b65cd04ac83b51251b40dca14423df351aTim Murray 30464c682b65cd04ac83b51251b40dca14423df351aTim Murray 30564c682b65cd04ac83b51251b40dca14423df351aTim Murray // S and D only 30664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssymv): 30764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 30864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda, 30964c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 31064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 31164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssbmv): 31264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 31364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f, 31464c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A, lda, (float*)X, call->incX, call->beta.f, 31564c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)Y, call->incY); 31664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 31764c682b65cd04ac83b51251b40dca14423df351aTim Murray //sspmv requires a packed 1D Allocation 31864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sspmv): 31964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 32064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, 32164c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 32264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 32364c682b65cd04ac83b51251b40dca14423df351aTim Murray // following calls have init reordered because A is output matrix 32464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sger): 32564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 32664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X, 32764c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, (float*)Y, call->incY, (float*)A, lda); 32864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 32964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssyr): 33064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr); 33164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 33264c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A, lda); 33364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 33464c682b65cd04ac83b51251b40dca14423df351aTim Murray // sspr is packed 1D Allocation A only 33564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sspr): 33664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr); 33764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 33864c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A); 33964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 34064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssyr2): 34164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 34264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 34364c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)Y, call->incY, (float*)A, lda); 34464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 34564c682b65cd04ac83b51251b40dca14423df351aTim Murray // sspr2 is packed 1D Allocation A only 34664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sspr2): 34764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 34864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 34964c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)Y, call->incY, (float*)A); 35064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 35164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsymv): 35264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 35364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda, 35464c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 35564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 35664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsbmv): 35764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 35864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d, 35964c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A, lda, (double*)X, call->incX, call->beta.d, 36064c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)Y, call->incY); 36164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 36264c682b65cd04ac83b51251b40dca14423df351aTim Murray // dspmv requires a packed 1D Allocation 36364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dspmv): 36464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 36564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, 36664c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 36764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 36864c682b65cd04ac83b51251b40dca14423df351aTim Murray // following calls have init reordered because A is output matrix 36964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dger): 37064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 37164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X, 37264c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, (double*)Y, call->incY, (double*)A, lda); 37364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 37464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsyr): 37564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr); 37664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 37764c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A, lda); 37864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 37964c682b65cd04ac83b51251b40dca14423df351aTim Murray // dspr is packed 1D Allocation A only 38064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dspr): 38164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr); 38264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 38364c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A); 38464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 38564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsyr2): 38664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 38764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 38864c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)Y, call->incY, (double*)A, lda); 38964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 39064c682b65cd04ac83b51251b40dca14423df351aTim Murray // dspr2 is packed 1D Allocation A only 39164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dspr2): 39264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 39364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 39464c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)Y, call->incY, (double*)A); 39564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 39664c682b65cd04ac83b51251b40dca14423df351aTim Murray 39764c682b65cd04ac83b51251b40dca14423df351aTim Murray // C and Z only 39864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chemv): 39964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 40064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda, 40164c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, (void*)&call->beta.c, Y, call->incY); 40264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 40364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chbmv): 40464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 40564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c, 40664c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY); 40764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 40864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chpmv): 40964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 41064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, 41164c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, (void*)&call->beta.c, Y, call->incY); 41264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 41364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cgeru): 41464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 41564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c, 41664c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 41764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 41864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cgerc): 41964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 42064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c, 42164c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 42264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 42364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cher): 42408ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda); 42564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f, 42664c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, A, lda); 42764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 42864c682b65cd04ac83b51251b40dca14423df351aTim Murray // packed 1D Allocations only 42964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chpr): 43008ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda); 43164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X, 43264c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, A); 43364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 43464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cher2): 43564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 43664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, 43764c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 43864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 43964c682b65cd04ac83b51251b40dca14423df351aTim Murray // packed 1D Allocations only 44064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chpr2): 44164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 44264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X, 44364c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, Y, call->incY, A); 44464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 44564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhemv): 44664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 44764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda, 44864c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, (void*)&call->beta.z, Y, call->incY); 44964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 45064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhbmv): 45164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 45264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z, 45364c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY); 45464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 45564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhpmv): 45664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 45764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, 45864c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, (void*)&call->beta.z, Y, call->incY); 45964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 46064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zgeru): 46164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 46264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z, 46364c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 46464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 46564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zgerc): 46664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 46764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z, 46864c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 46964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 47064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zher): 47108ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda); 47264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d, 47364c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, A, lda); 47464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 47564c682b65cd04ac83b51251b40dca14423df351aTim Murray // packed 1D Allocations only 47664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhpr): 47708ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda); 47864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X, 47964c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, A); 48064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 48164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zher2): 48264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 48364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, 48464c682b65cd04ac83b51251b40dca14423df351aTim Murray X, call->incX, Y, call->incY, A, lda); 48564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 48664c682b65cd04ac83b51251b40dca14423df351aTim Murray // packed 1D Allocations only 48764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhpr2): 48864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 48964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X, 49064c682b65cd04ac83b51251b40dca14423df351aTim Murray call->incX, Y, call->incY, A); 49164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 49264c682b65cd04ac83b51251b40dca14423df351aTim Murray 49364c682b65cd04ac83b51251b40dca14423df351aTim Murray // Level 3 BLAS 49464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_sgemm): 49564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 49664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f, 49764c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 49864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 49964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssymm): 50064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 50164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A, 50264c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 50364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 50464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssyrk): 50564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc); 50664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A, 50764c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, call->beta.f, (float*)C, ldc); 50864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 50964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ssyr2k): 51064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 51164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A, 51264c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 51364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 51464c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_strmm): 51564c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr); 51664c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f, 51764c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A, lda, (float*)B, ldb); 51864c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 51964c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_strsm): 52064c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr); 52164c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f, 52264c682b65cd04ac83b51251b40dca14423df351aTim Murray (float*)A, lda, (float*)B, ldb); 52364c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 52464c682b65cd04ac83b51251b40dca14423df351aTim Murray 52564c682b65cd04ac83b51251b40dca14423df351aTim Murray 52664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dgemm): 52764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 52864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d, 52964c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 53064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 53164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsymm): 53264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 53364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A, 53464c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 53564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 53664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsyrk): 53764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc); 53864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A, 53964c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, call->beta.d, (double*)C, ldc); 54064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 54164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dsyr2k): 54264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 54364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A, 54464c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 54564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 54664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtrmm): 54764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr); 54864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d, 54964c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A, lda, (double*)B, ldb); 55064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 55164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_dtrsm): 55264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr); 55364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d, 55464c682b65cd04ac83b51251b40dca14423df351aTim Murray (double*)A, lda, (double*)B, ldb); 55564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 55664c682b65cd04ac83b51251b40dca14423df351aTim Murray 55764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cgemm): 55864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 55964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c, 56064c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb, (void*)&call->beta.c, C, ldc); 56164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 56264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_csymm): 56364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 56464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, 56564c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, B, ldb, (void*)&call->beta.c, C, ldc); 56664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 56764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_csyrk): 56864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 56964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, 57064c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)&call->beta.c, C, ldc); 57164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 57264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_csyr2k): 57364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 57464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, 57564c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, B, ldb, (void*)&call->beta.c, C, ldc); 57664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 57764c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctrmm): 57864c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 57964c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c, 58064c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb); 58164c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 58264c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ctrsm): 58364c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 58464c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c, 58564c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb); 58664c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 58764c682b65cd04ac83b51251b40dca14423df351aTim Murray 58864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zgemm): 58964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 59064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z, 59164c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb, (void*)&call->beta.z, C, ldc); 59264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 59364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zsymm): 59464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 59564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, 59664c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, B, ldb, (void*)&call->beta.z, C, ldc); 59764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 59864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zsyrk): 59964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 60064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, 60164c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, (void*)&call->beta.z, C, ldc); 60264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 60364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zsyr2k): 60464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 60564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, 60664c682b65cd04ac83b51251b40dca14423df351aTim Murray lda, B, ldb, (void*)&call->beta.z, C, ldc); 60764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 60864c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztrmm): 60964c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 61064c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z, 61164c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb); 61264c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 61364c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_ztrsm): 61464c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 61564c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z, 61664c682b65cd04ac83b51251b40dca14423df351aTim Murray A, lda, B, ldb); 61764c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 61864c682b65cd04ac83b51251b40dca14423df351aTim Murray 61964c682b65cd04ac83b51251b40dca14423df351aTim Murray // Level 3 C and Z only 62064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_chemm): 62164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 62264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda, 62364c682b65cd04ac83b51251b40dca14423df351aTim Murray B, ldb, (void*)&call->beta.c, C, ldc); 62464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 62564c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cherk): 62664c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 62764c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda, 62864c682b65cd04ac83b51251b40dca14423df351aTim Murray call->beta.f, C, ldc); 62964c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 63064c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_cher2k): 63164c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 63264c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda, 63364c682b65cd04ac83b51251b40dca14423df351aTim Murray B, ldb, call->beta.f, C, ldc); 63464c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 63564c682b65cd04ac83b51251b40dca14423df351aTim Murray 63664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zhemm): 63764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 63864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda, 63964c682b65cd04ac83b51251b40dca14423df351aTim Murray B, ldb, (void*)&call->beta.z, C, ldc); 64064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 64164c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zherk): 64264c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 64364c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda, 64464c682b65cd04ac83b51251b40dca14423df351aTim Murray call->beta.d, C, ldc); 64564c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 64664c682b65cd04ac83b51251b40dca14423df351aTim Murray case (RsBlas_zher2k): 64764c682b65cd04ac83b51251b40dca14423df351aTim Murray initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 64864c682b65cd04ac83b51251b40dca14423df351aTim Murray cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda, 64964c682b65cd04ac83b51251b40dca14423df351aTim Murray B, ldb, call->beta.d, C, ldc); 65064c682b65cd04ac83b51251b40dca14423df351aTim Murray break; 65164c682b65cd04ac83b51251b40dca14423df351aTim Murray 652aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 6532b999883f2f390ee43ed18317d77c810a0c6657bTim Murray case (RsBlas_bnnm): 654aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray initABC(ain, sizeof(uint8_t), &A, &B, &C, &lda, &ldb, &ldc); 6552b999883f2f390ee43ed18317d77c810a0c6657bTim Murray kernelBNNM(call->M, call->N, call->K, 656aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray (const uint8_t*)A, call->a_offset, lda, 657aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray (const uint8_t*)B, call->b_offset, ldb, 658aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray (uint8_t*)C, call->c_offset, ldc, 659aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray call->c_mult_int); 660aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 661aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray break; 662aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 66364c682b65cd04ac83b51251b40dca14423df351aTim Murray default: 66464c682b65cd04ac83b51251b40dca14423df351aTim Murray ALOGE("unimplemented\n"); 66564c682b65cd04ac83b51251b40dca14423df351aTim Murray } 66664c682b65cd04ac83b51251b40dca14423df351aTim Murray 66764c682b65cd04ac83b51251b40dca14423df351aTim Murray 66864c682b65cd04ac83b51251b40dca14423df351aTim Murray} 66964c682b65cd04ac83b51251b40dca14423df351aTim Murray 6702b999883f2f390ee43ed18317d77c810a0c6657bTim Murrayvoid RsdCpuScriptIntrinsicBLAS::kernelBNNM(size_t m, size_t n, size_t k, 67106deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang const uint8_t* a, uint8_t a_offset, size_t lda, 67206deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang const uint8_t* b, uint8_t b_offset, size_t ldb, 67306deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang uint8_t* c, int32_t c_offset, size_t ldc, 67406deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang int32_t c_mult_int) { 6752b999883f2f390ee43ed18317d77c810a0c6657bTim Murray const int c_shift = 21; 676223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS) 677223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang // Non-optimized path for ARMv7 devices without SIMD instructions. 678223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang if (!gArchUseSIMD) { 6799195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang /* 6809195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang * Calculations are done in 1.10.21 fixed-point format for the final output, 6819195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang * just before there's a shift down to drop the fractional parts. The output 6829195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 6839195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang * gives some headroom to avoid wrapping around on small overflows. 6849195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang */ 685223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang size_t i = 0, j = 0, l = 0; 686223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang for (j = 0; j < n; j++) { 687223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang for (i = 0; i < m; i++) { 688223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang int32_t total = 0; 689223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang for (l = 0; l < k; l++) { 690223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int a_index = ((i * lda) + l); 691223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const uint8_t a_as_byte = a[a_index]; 692223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int32_t a_as_int = (((int32_t)(a_as_byte)) - a_offset); 693223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int b_index = ((j * ldb) + l); 694223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const uint8_t b_as_byte = b[b_index]; 695223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int32_t b_as_int = (((int32_t)(b_as_byte)) - b_offset); 696223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int32_t mult_as_int = (a_as_int * b_as_int); 697223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang total += mult_as_int; 698223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 699223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang const int c_index = ((ldc * i) + j); 700223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang int32_t output = 701223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang ((((total + c_offset) * c_mult_int) + (1 << (c_shift - 1))) 702223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang >> c_shift); 703223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang if (output > 255) { 704223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang output = 255; 705223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 706223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang if (output < 0) { 707223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang output = 0; 708223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 709223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang c[c_index] = (uint8_t)(output); 710223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 711223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 712223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang return; 713223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang } 714223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang#endif 715223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang 71699d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang // Using gemmlowp to calculate the low precision 8 bit GEMM. 717e4f999b761180a227864d97b172a42ca1d8c0df3Miao Wang bool transpose_a = true; 718e4f999b761180a227864d97b172a42ca1d8c0df3Miao Wang bool transpose_b = false; 719223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang bool transpose_c = true; 720223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang gemmlowp::eight_bit_int_gemm::EightBitIntGemm(transpose_a, transpose_b, transpose_c, 721223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang m, n, k, a, -a_offset, lda, 72299d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang b, -b_offset, ldb, c, c_offset, 7239195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang c_mult_int, c_shift, ldc, 7249195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang gemmlowp::eight_bit_int_gemm::BitDepthSetting::A8B8); 725223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang 726aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray} 727aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 728aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 729aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 730aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray 73164c682b65cd04ac83b51251b40dca14423df351aTim Murray 73264c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, 73364c682b65cd04ac83b51251b40dca14423df351aTim Murray const Script *s) 73464c682b65cd04ac83b51251b40dca14423df351aTim Murray : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) { 73564c682b65cd04ac83b51251b40dca14423df351aTim Murray 73664c682b65cd04ac83b51251b40dca14423df351aTim Murray 73764c682b65cd04ac83b51251b40dca14423df351aTim Murray} 73864c682b65cd04ac83b51251b40dca14423df351aTim Murray 73964c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() { 74064c682b65cd04ac83b51251b40dca14423df351aTim Murray} 74164c682b65cd04ac83b51251b40dca14423df351aTim Murray 74264c682b65cd04ac83b51251b40dca14423df351aTim Murray 74364c682b65cd04ac83b51251b40dca14423df351aTim Murray 74464c682b65cd04ac83b51251b40dca14423df351aTim Murray 74564c682b65cd04ac83b51251b40dca14423df351aTim Murray 74664c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx, 74764c682b65cd04ac83b51251b40dca14423df351aTim Murray const Script *s, const Element *e) { 74864c682b65cd04ac83b51251b40dca14423df351aTim Murray 74964c682b65cd04ac83b51251b40dca14423df351aTim Murray return new RsdCpuScriptIntrinsicBLAS(ctx, s); 75064c682b65cd04ac83b51251b40dca14423df351aTim Murray} 751