[go: nahoru, domu]

164c682b65cd04ac83b51251b40dca14423df351aTim Murray/*
264c682b65cd04ac83b51251b40dca14423df351aTim Murray * Copyright (C) 2012 The Android Open Source Project
364c682b65cd04ac83b51251b40dca14423df351aTim Murray *
464c682b65cd04ac83b51251b40dca14423df351aTim Murray * Licensed under the Apache License, Version 2.0 (the "License");
564c682b65cd04ac83b51251b40dca14423df351aTim Murray * you may not use this file except in compliance with the License.
664c682b65cd04ac83b51251b40dca14423df351aTim Murray * You may obtain a copy of the License at
764c682b65cd04ac83b51251b40dca14423df351aTim Murray *
864c682b65cd04ac83b51251b40dca14423df351aTim Murray *      http://www.apache.org/licenses/LICENSE-2.0
964c682b65cd04ac83b51251b40dca14423df351aTim Murray *
1064c682b65cd04ac83b51251b40dca14423df351aTim Murray * Unless required by applicable law or agreed to in writing, software
1164c682b65cd04ac83b51251b40dca14423df351aTim Murray * distributed under the License is distributed on an "AS IS" BASIS,
1264c682b65cd04ac83b51251b40dca14423df351aTim Murray * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1364c682b65cd04ac83b51251b40dca14423df351aTim Murray * See the License for the specific language governing permissions and
1464c682b65cd04ac83b51251b40dca14423df351aTim Murray * limitations under the License.
1564c682b65cd04ac83b51251b40dca14423df351aTim Murray */
1664c682b65cd04ac83b51251b40dca14423df351aTim Murray
1764c682b65cd04ac83b51251b40dca14423df351aTim Murray
1864c682b65cd04ac83b51251b40dca14423df351aTim Murray#include "rsCpuIntrinsic.h"
1964c682b65cd04ac83b51251b40dca14423df351aTim Murray#include "rsCpuIntrinsicInlines.h"
20e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#include "rsCpuBLASDispatch.h"
2199d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang#include "eight_bit_int_gemm.h"
2264c682b65cd04ac83b51251b40dca14423df351aTim Murray
2364c682b65cd04ac83b51251b40dca14423df351aTim Murrayusing namespace android;
2464c682b65cd04ac83b51251b40dca14423df351aTim Murrayusing namespace android::renderscript;
2564c682b65cd04ac83b51251b40dca14423df351aTim Murray
2664c682b65cd04ac83b51251b40dca14423df351aTim Murraynamespace android {
2764c682b65cd04ac83b51251b40dca14423df351aTim Murraynamespace renderscript {
2864c682b65cd04ac83b51251b40dca14423df351aTim Murray
2964c682b65cd04ac83b51251b40dca14423df351aTim Murray
3064c682b65cd04ac83b51251b40dca14423df351aTim Murrayclass RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic {
3164c682b65cd04ac83b51251b40dca14423df351aTim Murraypublic:
32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void invokeForEach(uint32_t slot,
33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       const Allocation ** ain,
34c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       uint32_t inLen,
35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       Allocation * aout,
36c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       const void * usr,
37c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       uint32_t usrLen,
38c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                       const RsScriptCall *sc) override;
3964c682b65cd04ac83b51251b40dca14423df351aTim Murray
40c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void populateScript(Script *) override;
41c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    ~RsdCpuScriptIntrinsicBLAS() override;
4264c682b65cd04ac83b51251b40dca14423df351aTim Murray    RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s);
4364c682b65cd04ac83b51251b40dca14423df351aTim Murray
4464c682b65cd04ac83b51251b40dca14423df351aTim Murrayprotected:
4564c682b65cd04ac83b51251b40dca14423df351aTim Murray
46aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray    uint8_t a_offset = 0;
47aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray    uint8_t b_offset = 0;
48aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray    uint8_t c_offset = 0;
49aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
50e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#ifdef RS_COMPATIBILITY_LIB
51e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang    bool isBlasLibInitialized = false;
52e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#endif
532b999883f2f390ee43ed18317d77c810a0c6657bTim Murray    static void kernelBNNM(size_t m, size_t n, size_t k,
5406deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                           const uint8_t* a, uint8_t a_offset, size_t lda,
5506deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                           const uint8_t* b, uint8_t b_offset, size_t ldb,
5606deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                           uint8_t* c, int32_t c_offset, size_t ldc,
5706deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                           int32_t c_mult_int);
58aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
59aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
6064c682b65cd04ac83b51251b40dca14423df351aTim Murray
6164c682b65cd04ac83b51251b40dca14423df351aTim Murray};
6264c682b65cd04ac83b51251b40dca14423df351aTim Murray
6364c682b65cd04ac83b51251b40dca14423df351aTim Murray}
6464c682b65cd04ac83b51251b40dca14423df351aTim Murray}
6564c682b65cd04ac83b51251b40dca14423df351aTim Murray
6664c682b65cd04ac83b51251b40dca14423df351aTim Murrayvoid RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) {
6764c682b65cd04ac83b51251b40dca14423df351aTim Murray    s->mHal.info.exportedVariableCount = 0;
6864c682b65cd04ac83b51251b40dca14423df351aTim Murray}
6964c682b65cd04ac83b51251b40dca14423df351aTim Murray
7064c682b65cd04ac83b51251b40dca14423df351aTim Murraystatic void initABC(const Allocation ** ain,
7164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    size_t size,
7264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    void** A,
7364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    void** B,
7464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    void** C,
7564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    int* lda,
7664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    int* ldb,
7764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    int* ldc)
7864c682b65cd04ac83b51251b40dca14423df351aTim Murray{
7964c682b65cd04ac83b51251b40dca14423df351aTim Murray    if (ain[0]) {
8064c682b65cd04ac83b51251b40dca14423df351aTim Murray        *A = ain[0]->mHal.drvState.lod[0].mallocPtr;
8164c682b65cd04ac83b51251b40dca14423df351aTim Murray        *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size);
8264c682b65cd04ac83b51251b40dca14423df351aTim Murray    }
8364c682b65cd04ac83b51251b40dca14423df351aTim Murray    if (ain[1]) {
8464c682b65cd04ac83b51251b40dca14423df351aTim Murray        *B = ain[1]->mHal.drvState.lod[0].mallocPtr;
8564c682b65cd04ac83b51251b40dca14423df351aTim Murray        *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size);
8664c682b65cd04ac83b51251b40dca14423df351aTim Murray    }
8764c682b65cd04ac83b51251b40dca14423df351aTim Murray    if (ain[2]) {
8864c682b65cd04ac83b51251b40dca14423df351aTim Murray        *C = ain[2]->mHal.drvState.lod[0].mallocPtr;
8964c682b65cd04ac83b51251b40dca14423df351aTim Murray        *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size);
9064c682b65cd04ac83b51251b40dca14423df351aTim Murray    }
9164c682b65cd04ac83b51251b40dca14423df351aTim Murray
9264c682b65cd04ac83b51251b40dca14423df351aTim Murray
9364c682b65cd04ac83b51251b40dca14423df351aTim Murray}
9464c682b65cd04ac83b51251b40dca14423df351aTim Murray
9564c682b65cd04ac83b51251b40dca14423df351aTim Murrayvoid RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot,
9664c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              const Allocation ** ain,
9764c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              uint32_t inLen,
9864c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              Allocation * aout,
9964c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              const void * usr,
10064c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              uint32_t usrLen,
10164c682b65cd04ac83b51251b40dca14423df351aTim Murray                                              const RsScriptCall *sc) {
10264c682b65cd04ac83b51251b40dca14423df351aTim Murray    RsBlasCall* call = (RsBlasCall*) usr;
10364c682b65cd04ac83b51251b40dca14423df351aTim Murray    // setup BLAS enum args
10464c682b65cd04ac83b51251b40dca14423df351aTim Murray    enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
10564c682b65cd04ac83b51251b40dca14423df351aTim Murray    enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
10664c682b65cd04ac83b51251b40dca14423df351aTim Murray    enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo;
10764c682b65cd04ac83b51251b40dca14423df351aTim Murray    enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag;
10864c682b65cd04ac83b51251b40dca14423df351aTim Murray    enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side;
10964c682b65cd04ac83b51251b40dca14423df351aTim Murray
11064c682b65cd04ac83b51251b40dca14423df351aTim Murray    void *A = nullptr;
11164c682b65cd04ac83b51251b40dca14423df351aTim Murray    void *B = nullptr;
11264c682b65cd04ac83b51251b40dca14423df351aTim Murray    void *C = nullptr;
11364c682b65cd04ac83b51251b40dca14423df351aTim Murray    void *X = nullptr;
11464c682b65cd04ac83b51251b40dca14423df351aTim Murray    void *Y = nullptr;
11564c682b65cd04ac83b51251b40dca14423df351aTim Murray
11664c682b65cd04ac83b51251b40dca14423df351aTim Murray    int lda = 0, ldb = 0, ldc = 0;
11764c682b65cd04ac83b51251b40dca14423df351aTim Murray
118e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#ifdef RS_COMPATIBILITY_LIB
119e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang    // Allow BNNM even without libblas
120e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang    if (call->func != RsBlas_bnnm && !isBlasLibInitialized) {
121e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang        if (!loadBLASLib()) {
122e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang            ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n");
123e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang            return;
124e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang        }
125e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang        isBlasLibInitialized = true;
126e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang    }
127e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang#endif
128e941f18202b9c9883ff81c63710f7faec5c988e4Miao Wang
12964c682b65cd04ac83b51251b40dca14423df351aTim Murray    switch (call->func) {
13064c682b65cd04ac83b51251b40dca14423df351aTim Murray
13164c682b65cd04ac83b51251b40dca14423df351aTim Murray    // Level 1 BLAS: returns into a 1D Allocation
13264c682b65cd04ac83b51251b40dca14423df351aTim Murray
13364c682b65cd04ac83b51251b40dca14423df351aTim Murray
13464c682b65cd04ac83b51251b40dca14423df351aTim Murray    // Level 2 BLAS
13564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sgemv):
136b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
13764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A,
13864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
13964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
14064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sgbmv):
141b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
14264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
14364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->alpha.f, (float*)A, lda, (float*)X, call->incX,
14464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->beta.f, (float*)Y, call->incY);
14564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
14664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_strmv):
14764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
14864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
14964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (float*)X, call->incX);
15064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
15164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_stbmv):
15264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
15364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
15464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (float*)X, call->incX);
15564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
15664c682b65cd04ac83b51251b40dca14423df351aTim Murray    // stpmv takes a packed 1D Allocation only
15764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_stpmv):
15864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
15964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
16064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)X, call->incX);
16164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
16264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_strsv):
16364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
16464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda,
16564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)X, call->incX);
16664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
16764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_stbsv):
16864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
16964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
17064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (float*)X, call->incX);
17164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
17264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_stpsv):
17364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
17464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
17564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)X, call->incX);
17664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
17764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dgemv):
178b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
17964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A,
18064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
18164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
18264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dgbmv):
183b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
18464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
18564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->alpha.d, (double*)A, lda, (double*)X, call->incX,
18664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->beta.d, (double*)Y, call->incY);
18764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
18864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtrmv):
18964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
19064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
19164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (double*)X, call->incX);
19264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
19364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtbmv):
19464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
19564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
19664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (double*)X, call->incX);
19764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
19864c682b65cd04ac83b51251b40dca14423df351aTim Murray    // stpmv takes a packed 1D Allocation only
19964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtpmv):
20064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
20164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
20264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)X, call->incX);
20364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
20464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtrsv):
20564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
20664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda,
20764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)X, call->incX);
20864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
20964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtbsv):
21064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
21164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
21264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (double*)X, call->incX);
21364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
21464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtpsv):
21564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
21664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
21764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)X, call->incX);
21864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
21964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cgemv):
220b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
22164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A,
22264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY);
22364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
22464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cgbmv):
225b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
22664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
22764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX,
22864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)&call->beta.c, (void*)Y, call->incY);
22964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
23064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctrmv):
23164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
23264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
23364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
23464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
23564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctbmv):
23664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
23764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
23864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
23964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
24064c682b65cd04ac83b51251b40dca14423df351aTim Murray    // stpmv takes a packed 1D Allocation only
24164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctpmv):
24264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
24364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
24464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
24564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
24664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctrsv):
24764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
24864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
24964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
25064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
25164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctbsv):
25264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
25364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
25464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
25564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
25664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctpsv):
25764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
25864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
25964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
26064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
26164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zgemv):
262b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
26364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A,
26464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY);
26564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
26664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zgbmv):
267b75ba0fc7469d0bb4c1a6679664a846b3741792eMiao Wang        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
26864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
26964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX,
27064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)&call->beta.z, (void*)Y, call->incY);
27164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
27264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztrmv):
27364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
27464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
27564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
27664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
27764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztbmv):
27864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
27964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
28064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
28164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
28264c682b65cd04ac83b51251b40dca14423df351aTim Murray    // stpmv takes a packed 1D Allocation only
28364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztpmv):
28464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
28564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
28664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
28764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
28864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztrsv):
28964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
29064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
29164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
29264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
29364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztbsv):
29464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
29564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
29664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)X, call->incX);
29764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
29864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztpsv):
29964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
30064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
30164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (void*)X, call->incX);
30264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
30364c682b65cd04ac83b51251b40dca14423df351aTim Murray
30464c682b65cd04ac83b51251b40dca14423df351aTim Murray
30564c682b65cd04ac83b51251b40dca14423df351aTim Murray    // S and D only
30664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssymv):
30764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
30864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda,
30964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
31064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
31164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssbmv):
31264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
31364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f,
31464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)A, lda, (float*)X, call->incX, call->beta.f,
31564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)Y, call->incY);
31664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
31764c682b65cd04ac83b51251b40dca14423df351aTim Murray    //sspmv requires a packed 1D Allocation
31864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sspmv):
31964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
32064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A,
32164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
32264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
32364c682b65cd04ac83b51251b40dca14423df351aTim Murray    // following calls have init reordered because A is output matrix
32464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sger):
32564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
32664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X,
32764c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, (float*)Y, call->incY, (float*)A, lda);
32864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
32964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssyr):
33064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
33164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
33264c682b65cd04ac83b51251b40dca14423df351aTim Murray                   (float*)A, lda);
33364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
33464c682b65cd04ac83b51251b40dca14423df351aTim Murray    // sspr is packed 1D Allocation A only
33564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sspr):
33664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
33764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
33864c682b65cd04ac83b51251b40dca14423df351aTim Murray                   (float*)A);
33964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
34064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssyr2):
34164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
34264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
34364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)Y, call->incY, (float*)A, lda);
34464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
34564c682b65cd04ac83b51251b40dca14423df351aTim Murray    // sspr2 is packed 1D Allocation A only
34664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sspr2):
34764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
34864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
34964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)Y, call->incY, (float*)A);
35064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
35164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsymv):
35264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
35364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda,
35464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
35564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
35664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsbmv):
35764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
35864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d,
35964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)A, lda, (double*)X, call->incX, call->beta.d,
36064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)Y, call->incY);
36164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
36264c682b65cd04ac83b51251b40dca14423df351aTim Murray    // dspmv requires a packed 1D Allocation
36364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dspmv):
36464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
36564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A,
36664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
36764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
36864c682b65cd04ac83b51251b40dca14423df351aTim Murray    // following calls have init reordered because A is output matrix
36964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dger):
37064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
37164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X,
37264c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, (double*)Y, call->incY, (double*)A, lda);
37364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
37464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsyr):
37564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
37664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
37764c682b65cd04ac83b51251b40dca14423df351aTim Murray                   (double*)A, lda);
37864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
37964c682b65cd04ac83b51251b40dca14423df351aTim Murray    // dspr is packed 1D Allocation A only
38064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dspr):
38164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
38264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
38364c682b65cd04ac83b51251b40dca14423df351aTim Murray                   (double*)A);
38464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
38564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsyr2):
38664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
38764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
38864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)Y, call->incY, (double*)A, lda);
38964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
39064c682b65cd04ac83b51251b40dca14423df351aTim Murray    // dspr2 is packed 1D Allocation A only
39164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dspr2):
39264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
39364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
39464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)Y, call->incY, (double*)A);
39564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
39664c682b65cd04ac83b51251b40dca14423df351aTim Murray
39764c682b65cd04ac83b51251b40dca14423df351aTim Murray    // C and Z only
39864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chemv):
39964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
40064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda,
40164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, (void*)&call->beta.c, Y, call->incY);
40264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
40364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chbmv):
40464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
40564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c,
40664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY);
40764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
40864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chpmv):
40964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
41064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A,
41164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, (void*)&call->beta.c, Y, call->incY);
41264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
41364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cgeru):
41464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
41564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
41664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, Y, call->incY, A, lda);
41764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
41864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cgerc):
41964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
42064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
42164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, Y, call->incY, A, lda);
42264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
42364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cher):
42408ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang        initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
42564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f,
42664c682b65cd04ac83b51251b40dca14423df351aTim Murray                   X, call->incX, A, lda);
42764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
42864c682b65cd04ac83b51251b40dca14423df351aTim Murray    // packed 1D Allocations only
42964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chpr):
43008ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang        initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
43164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X,
43264c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, A);
43364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
43464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cher2):
43564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
43664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c,
43764c682b65cd04ac83b51251b40dca14423df351aTim Murray                   X, call->incX, Y, call->incY, A, lda);
43864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
43964c682b65cd04ac83b51251b40dca14423df351aTim Murray    // packed 1D Allocations only
44064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chpr2):
44164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
44264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X,
44364c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, Y, call->incY, A);
44464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
44564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhemv):
44664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
44764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda,
44864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, (void*)&call->beta.z, Y, call->incY);
44964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
45064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhbmv):
45164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
45264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z,
45364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY);
45464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
45564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhpmv):
45664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
45764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A,
45864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, (void*)&call->beta.z, Y, call->incY);
45964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
46064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zgeru):
46164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
46264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
46364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, Y, call->incY, A, lda);
46464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
46564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zgerc):
46664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
46764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
46864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    X, call->incX, Y, call->incY, A, lda);
46964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
47064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zher):
47108ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang        initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
47264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d,
47364c682b65cd04ac83b51251b40dca14423df351aTim Murray                   X, call->incX, A, lda);
47464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
47564c682b65cd04ac83b51251b40dca14423df351aTim Murray    // packed 1D Allocations only
47664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhpr):
47708ef7b7f7977e9c991d8ba94a63860edcb88a3d9Miao Wang        initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
47864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X,
47964c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, A);
48064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
48164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zher2):
48264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
48364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z,
48464c682b65cd04ac83b51251b40dca14423df351aTim Murray                   X, call->incX, Y, call->incY, A, lda);
48564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
48664c682b65cd04ac83b51251b40dca14423df351aTim Murray    // packed 1D Allocations only
48764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhpr2):
48864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
48964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X,
49064c682b65cd04ac83b51251b40dca14423df351aTim Murray                   call->incX, Y, call->incY, A);
49164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
49264c682b65cd04ac83b51251b40dca14423df351aTim Murray
49364c682b65cd04ac83b51251b40dca14423df351aTim Murray    // Level 3 BLAS
49464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_sgemm):
49564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
49664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f,
49764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
49864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
49964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssymm):
50064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
50164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A,
50264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
50364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
50464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssyrk):
50564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc);
50664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
50764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, call->beta.f, (float*)C, ldc);
50864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
50964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ssyr2k):
51064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
51164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
51264c682b65cd04ac83b51251b40dca14423df351aTim Murray                     lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
51364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
51464c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_strmm):
51564c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
51664c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
51764c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)A, lda, (float*)B, ldb);
51864c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
51964c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_strsm):
52064c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
52164c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
52264c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (float*)A, lda, (float*)B, ldb);
52364c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
52464c682b65cd04ac83b51251b40dca14423df351aTim Murray
52564c682b65cd04ac83b51251b40dca14423df351aTim Murray
52664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dgemm):
52764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
52864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d,
52964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
53064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
53164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsymm):
53264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
53364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A,
53464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
53564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
53664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsyrk):
53764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc);
53864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
53964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, call->beta.d, (double*)C, ldc);
54064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
54164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dsyr2k):
54264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
54364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
54464c682b65cd04ac83b51251b40dca14423df351aTim Murray                     lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
54564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
54664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtrmm):
54764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
54864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
54964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)A, lda, (double*)B, ldb);
55064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
55164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_dtrsm):
55264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
55364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
55464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    (double*)A, lda, (double*)B, ldb);
55564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
55664c682b65cd04ac83b51251b40dca14423df351aTim Murray
55764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cgemm):
55864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
55964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c,
56064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb, (void*)&call->beta.c, C, ldc);
56164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
56264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_csymm):
56364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
56464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A,
56564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, B, ldb, (void*)&call->beta.c, C, ldc);
56664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
56764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_csyrk):
56864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
56964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
57064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)&call->beta.c, C, ldc);
57164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
57264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_csyr2k):
57364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
57464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
57564c682b65cd04ac83b51251b40dca14423df351aTim Murray                     lda, B, ldb, (void*)&call->beta.c, C, ldc);
57664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
57764c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctrmm):
57864c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
57964c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
58064c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb);
58164c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
58264c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ctrsm):
58364c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
58464c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
58564c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb);
58664c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
58764c682b65cd04ac83b51251b40dca14423df351aTim Murray
58864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zgemm):
58964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
59064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z,
59164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb, (void*)&call->beta.z, C, ldc);
59264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
59364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zsymm):
59464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
59564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A,
59664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, B, ldb, (void*)&call->beta.z, C, ldc);
59764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
59864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zsyrk):
59964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
60064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
60164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    lda, (void*)&call->beta.z, C, ldc);
60264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
60364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zsyr2k):
60464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
60564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
60664c682b65cd04ac83b51251b40dca14423df351aTim Murray                     lda, B, ldb, (void*)&call->beta.z, C, ldc);
60764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
60864c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztrmm):
60964c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
61064c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
61164c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb);
61264c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
61364c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_ztrsm):
61464c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
61564c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
61664c682b65cd04ac83b51251b40dca14423df351aTim Murray                    A, lda, B, ldb);
61764c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
61864c682b65cd04ac83b51251b40dca14423df351aTim Murray
61964c682b65cd04ac83b51251b40dca14423df351aTim Murray    // Level 3 C and Z only
62064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_chemm):
62164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
62264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda,
62364c682b65cd04ac83b51251b40dca14423df351aTim Murray                    B, ldb, (void*)&call->beta.c, C, ldc);
62464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
62564c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cherk):
62664c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
62764c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda,
62864c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->beta.f, C, ldc);
62964c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
63064c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_cher2k):
63164c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
63264c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda,
63364c682b65cd04ac83b51251b40dca14423df351aTim Murray                     B, ldb, call->beta.f, C, ldc);
63464c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
63564c682b65cd04ac83b51251b40dca14423df351aTim Murray
63664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zhemm):
63764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
63864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda,
63964c682b65cd04ac83b51251b40dca14423df351aTim Murray                    B, ldb, (void*)&call->beta.z, C, ldc);
64064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
64164c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zherk):
64264c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
64364c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda,
64464c682b65cd04ac83b51251b40dca14423df351aTim Murray                    call->beta.d, C, ldc);
64564c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
64664c682b65cd04ac83b51251b40dca14423df351aTim Murray    case (RsBlas_zher2k):
64764c682b65cd04ac83b51251b40dca14423df351aTim Murray        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
64864c682b65cd04ac83b51251b40dca14423df351aTim Murray        cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda,
64964c682b65cd04ac83b51251b40dca14423df351aTim Murray                     B, ldb, call->beta.d, C, ldc);
65064c682b65cd04ac83b51251b40dca14423df351aTim Murray        break;
65164c682b65cd04ac83b51251b40dca14423df351aTim Murray
652aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
6532b999883f2f390ee43ed18317d77c810a0c6657bTim Murray    case (RsBlas_bnnm):
654aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray        initABC(ain, sizeof(uint8_t), &A, &B, &C, &lda, &ldb, &ldc);
6552b999883f2f390ee43ed18317d77c810a0c6657bTim Murray        kernelBNNM(call->M, call->N, call->K,
656aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray                    (const uint8_t*)A, call->a_offset, lda,
657aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray                    (const uint8_t*)B, call->b_offset, ldb,
658aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray                    (uint8_t*)C, call->c_offset, ldc,
659aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray                    call->c_mult_int);
660aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
661aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray        break;
662aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
66364c682b65cd04ac83b51251b40dca14423df351aTim Murray    default:
66464c682b65cd04ac83b51251b40dca14423df351aTim Murray        ALOGE("unimplemented\n");
66564c682b65cd04ac83b51251b40dca14423df351aTim Murray    }
66664c682b65cd04ac83b51251b40dca14423df351aTim Murray
66764c682b65cd04ac83b51251b40dca14423df351aTim Murray
66864c682b65cd04ac83b51251b40dca14423df351aTim Murray}
66964c682b65cd04ac83b51251b40dca14423df351aTim Murray
6702b999883f2f390ee43ed18317d77c810a0c6657bTim Murrayvoid RsdCpuScriptIntrinsicBLAS::kernelBNNM(size_t m, size_t n, size_t k,
67106deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                                           const uint8_t* a, uint8_t a_offset, size_t lda,
67206deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                                           const uint8_t* b, uint8_t b_offset, size_t ldb,
67306deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                                           uint8_t* c, int32_t c_offset, size_t ldc,
67406deda3751a4a7358a7c7e03fbf1e4325fafb807Miao Wang                                           int32_t c_mult_int) {
6752b999883f2f390ee43ed18317d77c810a0c6657bTim Murray    const int c_shift = 21;
676223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
677223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang    // Non-optimized path for ARMv7 devices without SIMD instructions.
678223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang    if (!gArchUseSIMD) {
6799195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang        /*
6809195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang         * Calculations are done in 1.10.21 fixed-point format for the final output,
6819195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang         * just before there's a shift down to drop the fractional parts. The output
6829195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang         * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
6839195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang         * gives some headroom to avoid wrapping around on small overflows.
6849195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang         */
685223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang        size_t i = 0, j = 0, l = 0;
686223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang        for (j = 0; j < n; j++) {
687223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang            for (i = 0; i < m; i++) {
688223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                int32_t total = 0;
689223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                for (l = 0; l < k; l++) {
690223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const int a_index = ((i * lda) + l);
691223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const uint8_t a_as_byte = a[a_index];
692223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const int32_t a_as_int = (((int32_t)(a_as_byte)) - a_offset);
693223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const int b_index = ((j * ldb) + l);
694223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const uint8_t b_as_byte = b[b_index];
695223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const int32_t b_as_int = (((int32_t)(b_as_byte)) - b_offset);
696223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    const int32_t mult_as_int = (a_as_int * b_as_int);
697223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    total += mult_as_int;
698223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                }
699223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                const int c_index = ((ldc * i) + j);
700223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                int32_t output =
701223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    ((((total + c_offset) * c_mult_int) + (1 << (c_shift - 1)))
702223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                     >> c_shift);
703223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                if (output > 255) {
704223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    output = 255;
705223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                }
706223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                if (output < 0) {
707223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                    output = 0;
708223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                }
709223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                c[c_index] = (uint8_t)(output);
710223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang            }
711223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang        }
712223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang        return;
713223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang    }
714223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang#endif
715223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang
71699d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang    // Using gemmlowp to calculate the low precision 8 bit GEMM.
717e4f999b761180a227864d97b172a42ca1d8c0df3Miao Wang    bool transpose_a = true;
718e4f999b761180a227864d97b172a42ca1d8c0df3Miao Wang    bool transpose_b = false;
719223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang    bool transpose_c = true;
720223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang    gemmlowp::eight_bit_int_gemm::EightBitIntGemm(transpose_a, transpose_b, transpose_c,
721223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang                                                  m, n, k, a, -a_offset, lda,
72299d0e8130f5b4bb83d1a68d96496fa558e35193aMiao Wang                                                  b, -b_offset, ldb, c, c_offset,
7239195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang                                                  c_mult_int, c_shift, ldc,
7249195e5188cb0e72d874512de01e7e58f1f47e0b7Miao Wang                                                  gemmlowp::eight_bit_int_gemm::BitDepthSetting::A8B8);
725223231fe99c9c958de4a1c8723aff88cb667de52Miao Wang
726aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray}
727aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
728aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
729aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
730aff744561bea3c8a7a7d59c0cb8cd9438f6dcd1cTim Murray
73164c682b65cd04ac83b51251b40dca14423df351aTim Murray
73264c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx,
73364c682b65cd04ac83b51251b40dca14423df351aTim Murray                                                   const Script *s)
73464c682b65cd04ac83b51251b40dca14423df351aTim Murray            : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) {
73564c682b65cd04ac83b51251b40dca14423df351aTim Murray
73664c682b65cd04ac83b51251b40dca14423df351aTim Murray
73764c682b65cd04ac83b51251b40dca14423df351aTim Murray}
73864c682b65cd04ac83b51251b40dca14423df351aTim Murray
73964c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() {
74064c682b65cd04ac83b51251b40dca14423df351aTim Murray}
74164c682b65cd04ac83b51251b40dca14423df351aTim Murray
74264c682b65cd04ac83b51251b40dca14423df351aTim Murray
74364c682b65cd04ac83b51251b40dca14423df351aTim Murray
74464c682b65cd04ac83b51251b40dca14423df351aTim Murray
74564c682b65cd04ac83b51251b40dca14423df351aTim Murray
74664c682b65cd04ac83b51251b40dca14423df351aTim MurrayRsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
74764c682b65cd04ac83b51251b40dca14423df351aTim Murray                                    const Script *s, const Element *e) {
74864c682b65cd04ac83b51251b40dca14423df351aTim Murray
74964c682b65cd04ac83b51251b40dca14423df351aTim Murray    return new RsdCpuScriptIntrinsicBLAS(ctx, s);
75064c682b65cd04ac83b51251b40dca14423df351aTim Murray}
751