-
Notifications
You must be signed in to change notification settings - Fork 684
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add onnxifi quantization support #2617
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,54 +40,65 @@ inline llvm::Error loadWeight(const onnxTensorDescriptorV1 &in, Tensor *T) { | |
RETURN_ERR("Only support CPU memory tensors."); | ||
} | ||
|
||
// This is a caffe2 offset shift. | ||
const int32_t OFFSETSHIFT = 128; | ||
std::vector<size_t> dims; | ||
for (unsigned i = 0; i < in.dimensions; ++i) { | ||
dims.push_back(in.shape[i]); | ||
} | ||
|
||
if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) { | ||
T->reset(ElemKind::FloatTy, dims); | ||
|
||
auto TH = T->getHandle<>(); | ||
float *data = (float *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = data[i]; | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 || | ||
in.dataType == ONNXIFI_DATATYPE_INT64) { | ||
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64; | ||
(void)inDataSigned; | ||
T->reset(ElemKind::Int64ITy, dims); | ||
|
||
auto TH = T->getHandle<int64_t>(); | ||
int64_t *data = (int64_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
RETURN_ERR_IF_NOT( | ||
(inDataSigned || data[i] >= 0), | ||
"Disallow overflow of loaded UINT64 data into Int64ITy."); | ||
TH.raw(i) = data[i]; | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) { | ||
T->reset(ElemKind::Int32ITy, dims); | ||
|
||
auto TH = T->getHandle<int32_t>(); | ||
int32_t *data = (int32_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = data[i]; | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_UINT8) { | ||
T->reset(ElemKind::Int8QTy, dims, 1.0, 0); | ||
|
||
auto TH = T->getHandle<int8_t>(); | ||
uint8_t *data = (uint8_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
constexpr uint8_t OFFSETSHIFT = 128; | ||
TH.raw(i) = static_cast<int8_t>((((uint8_t)data[i]) - OFFSETSHIFT)); | ||
if (in.is_quantized == 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this not boolean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rdzhabarov because onnxifi is C API, and it is an uint8 instead of bool (Actually it is supposed to be char at first...) |
||
if (in.dataType == ONNXIFI_DATATYPE_UINT8) { | ||
T->reset(ElemKind::Int8QTy, dims, in.scale, in.bias - OFFSETSHIFT); | ||
|
||
auto TH = T->getHandle<int8_t>(); | ||
uint8_t *data = (uint8_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = (int8_t)(data[i] - OFFSETSHIFT); | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) { | ||
T->reset(ElemKind::Int32QTy, dims, in.scale, in.bias); | ||
auto TH = T->getHandle<int32_t>(); | ||
int32_t *data = (int32_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = data[i]; | ||
} | ||
} else { | ||
RETURN_ERR("Only uint8 and int32 quantized tensors are supported."); | ||
} | ||
} else { | ||
RETURN_ERR("Only float, index, and int8 tensors are supported."); | ||
} | ||
if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) { | ||
T->reset(ElemKind::FloatTy, dims); | ||
|
||
auto TH = T->getHandle<>(); | ||
float *data = (float *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = data[i]; | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 || | ||
in.dataType == ONNXIFI_DATATYPE_INT64) { | ||
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64; | ||
T->reset(ElemKind::Int64ITy, dims); | ||
|
||
auto TH = T->getHandle<int64_t>(); | ||
int64_t *data = (int64_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
RETURN_ERR_IF_NOT( | ||
(inDataSigned || data[i] >= 0), | ||
"Disallow overflow of loaded UINT64 data into Int64ITy."); | ||
TH.raw(i) = data[i]; | ||
} | ||
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) { | ||
T->reset(ElemKind::Int32ITy, dims); | ||
|
||
auto TH = T->getHandle<int32_t>(); | ||
int32_t *data = (int32_t *)in.buffer; | ||
for (size_t i = 0; i < TH.size(); ++i) { | ||
TH.raw(i) = data[i]; | ||
} | ||
} else { | ||
RETURN_ERR("Only float and index tensors are supported."); | ||
} | ||
} | ||
return llvm::Error::success(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,8 +43,8 @@ using ArgumentDictionaryTy = | |
/// In Glow, the activations are quantized to int_8. Therefore, for the offset | ||
/// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to | ||
/// make the activations becomes int8_t. | ||
/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128 | ||
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255 | ||
/// For Glow: -128 <= orig_fp32/scale_1 + offset_1 <= 127 | ||
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 <= 255 | ||
/// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128 | ||
const int32_t OFFSETSHIFT = 128; | ||
|
||
|
@@ -60,7 +60,6 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) { | |
} | ||
dim.push_back(d); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, not your change, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wont bother having one more line of credit lol |
||
if (in.data_type() == caffe2::TensorProto::FLOAT) { | ||
T->reset(ElemKind::FloatTy, dim); | ||
return llvm::Error::success(); | ||
|
@@ -77,6 +76,26 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) { | |
RETURN_ERR("Only float and index tensors are supported"); | ||
} | ||
} | ||
|
||
llvm::Error setTensorType(const caffe2::QTensorProto &in, Tensor *T) { | ||
std::vector<size_t> dim; | ||
for (auto d : in.dims()) { | ||
if (d == 0) { | ||
RETURN_ERR("0 dimemsion qtensor is not supported"); | ||
} | ||
dim.push_back(d); | ||
} | ||
|
||
if (in.data_type() == caffe2::TensorProto::UINT8) { | ||
T->reset(ElemKind::Int8QTy, dim, in.scale(), in.bias() - OFFSETSHIFT); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the use case for what's checked in line 72?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rdzhabarov These are two different branches, in #72 we assume the incoming tensor is a non-quantized tensor, and only use Int8QTy to represent a int8 tensor. Here we know it is a quantized tensor (by knowing protobuf is a QTensorProto), we treated it like real quantized tensor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. linked PR seems to be unrelated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh it is not the pr I want to link, it is the 72nd line in this file... |
||
return llvm::Error::success(); | ||
} else if (in.data_type() == caffe2::TensorProto::INT32) { | ||
T->reset(ElemKind::Int32QTy, dim, in.scale(), in.bias()); | ||
return llvm::Error::success(); | ||
} else { | ||
RETURN_ERR("Only uint8 and int32 qtensors are supported"); | ||
} | ||
} | ||
} // namespace | ||
|
||
/// Translates the protocol buffer node \p op into a random access map. | ||
|
@@ -1048,37 +1067,59 @@ llvm::Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { | |
RETURN_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator.")); | ||
} | ||
|
||
template <class TensorProtoType> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick, add a blank line here before |
||
llvm::Error | ||
Caffe2ModelLoader::loadInputsWithTensorProtoType(const caffe2::NetDef &net, | ||
bool loadInputsAsPlaceholders, | ||
const TensorProtoType &in) { | ||
// Skip static weights | ||
if (tensors_.count(in.name())) { | ||
return llvm::Error::success(); | ||
} | ||
|
||
if (loadInputsAsPlaceholders) { | ||
Tensor T; | ||
RETURN_IF_ERR(setTensorType(in, &T)); | ||
|
||
Placeholder *placeholder; | ||
ASSIGN_VALUE_OR_RETURN_ERR( | ||
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType())); | ||
nameToInputVars_.try_emplace(in.name(), placeholder); | ||
} else { | ||
std::unique_ptr<Tensor> T(new Tensor()); | ||
RETURN_IF_ERR(setTensorType(in, T.get())); | ||
tensors_[in.name()] = std::move(T); | ||
} | ||
return llvm::Error::success(); | ||
} | ||
|
||
llvm::Error Caffe2ModelLoader::loadInputs(const caffe2::NetDef &net, | ||
bool loadInputsAsPlaceholders) { | ||
const caffe2::Argument *arg = nullptr; | ||
for (auto i = 0, e = net.arg_size(); i < e; ++i) { | ||
const caffe2::Argument *arg = nullptr, *qarg = nullptr; | ||
for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) { | ||
if (net.arg(i).name() == "input_shape_info") { | ||
arg = &net.arg(i); | ||
break; | ||
} else if (net.arg(i).name() == "input_qshape_info") { | ||
qarg = &net.arg(i); | ||
} | ||
} | ||
|
||
// Load all regular tensor input | ||
if (arg) { | ||
for (const auto &in : arg->tensors()) { | ||
// Skip static weights | ||
if (tensors_.count(in.name())) { | ||
continue; | ||
} | ||
|
||
if (loadInputsAsPlaceholders) { | ||
Tensor T; | ||
RETURN_IF_ERR(setTensorType(in, &T)); | ||
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>( | ||
net, loadInputsAsPlaceholders, in)); | ||
} | ||
} | ||
|
||
Placeholder *placeholder; | ||
ASSIGN_VALUE_OR_RETURN_ERR( | ||
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType())); | ||
nameToInputVars_.try_emplace(in.name(), placeholder); | ||
} else { | ||
std::unique_ptr<Tensor> T(new Tensor()); | ||
RETURN_IF_ERR(setTensorType(in, T.get())); | ||
tensors_[in.name()] = std::move(T); | ||
} | ||
// Load all quantized tensor input | ||
if (qarg) { | ||
for (const auto &in : qarg->qtensors()) { | ||
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>( | ||
net, loadInputsAsPlaceholders, in)); | ||
} | ||
} | ||
|
||
return llvm::Error::success(); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would be good to have this in one place. C2ModelLoader.cpp has the same.