[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add onnxifi quantization support #2617

Merged
merged 4 commits into from
Mar 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/glow/Importer/Caffe2ModelLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ class Caffe2ModelLoader
/// Mapping between Caffe2 tensor names for inputs and actual Glow input vars.
llvm::StringMap<Placeholder *> nameToInputVars_;

/// loadInputs calls this function for each member in its target arguments.
/// Currently we are supporting two tensorprototypes:
/// caffe2::TensorProto, caffe2::QTensorProto
template <class TensorProtoType>
llvm::Error loadInputsWithTensorProtoType(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders,
const TensorProtoType &in);

/// Load the inputs from the NetDef. If \p loadInputsAsPlaceholders is
/// true then this will load each graph input as a placeholder otherwise it
/// will create an empty tensor for each input.
Expand Down
93 changes: 52 additions & 41 deletions include/glow/Importer/CommonOperatorLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,54 +40,65 @@ inline llvm::Error loadWeight(const onnxTensorDescriptorV1 &in, Tensor *T) {
RETURN_ERR("Only support CPU memory tensors.");
}

// This is a caffe2 offset shift.
const int32_t OFFSETSHIFT = 128;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be good to have this in one place. C2ModelLoader.cpp has the same.

std::vector<size_t> dims;
for (unsigned i = 0; i < in.dimensions; ++i) {
dims.push_back(in.shape[i]);
}

if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
T->reset(ElemKind::FloatTy, dims);

auto TH = T->getHandle<>();
float *data = (float *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
in.dataType == ONNXIFI_DATATYPE_INT64) {
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
(void)inDataSigned;
T->reset(ElemKind::Int64ITy, dims);

auto TH = T->getHandle<int64_t>();
int64_t *data = (int64_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
RETURN_ERR_IF_NOT(
(inDataSigned || data[i] >= 0),
"Disallow overflow of loaded UINT64 data into Int64ITy.");
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32ITy, dims);

auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
T->reset(ElemKind::Int8QTy, dims, 1.0, 0);

auto TH = T->getHandle<int8_t>();
uint8_t *data = (uint8_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
constexpr uint8_t OFFSETSHIFT = 128;
TH.raw(i) = static_cast<int8_t>((((uint8_t)data[i]) - OFFSETSHIFT));
if (in.is_quantized == 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this not boolean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rdzhabarov because onnxifi is C API, and it is an uint8 instead of bool (Actually it is supposed to be char at first...)

if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
T->reset(ElemKind::Int8QTy, dims, in.scale, in.bias - OFFSETSHIFT);

auto TH = T->getHandle<int8_t>();
uint8_t *data = (uint8_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = (int8_t)(data[i] - OFFSETSHIFT);
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32QTy, dims, in.scale, in.bias);
auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else {
RETURN_ERR("Only uint8 and int32 quantized tensors are supported.");
}
} else {
RETURN_ERR("Only float, index, and int8 tensors are supported.");
}
if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
T->reset(ElemKind::FloatTy, dims);

auto TH = T->getHandle<>();
float *data = (float *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
in.dataType == ONNXIFI_DATATYPE_INT64) {
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
T->reset(ElemKind::Int64ITy, dims);

auto TH = T->getHandle<int64_t>();
int64_t *data = (int64_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
RETURN_ERR_IF_NOT(
(inDataSigned || data[i] >= 0),
"Disallow overflow of loaded UINT64 data into Int64ITy.");
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32ITy, dims);

auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else {
RETURN_ERR("Only float and index tensors are supported.");
}
}
return llvm::Error::success();
}

Expand Down
87 changes: 64 additions & 23 deletions lib/Importer/Caffe2ModelLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ using ArgumentDictionaryTy =
/// In Glow, the activations are quantized to int_8. Therefore, for the offset
/// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to
/// make the activations becomes int8_t.
/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255
/// For Glow: -128 <= orig_fp32/scale_1 + offset_1 <= 127
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 <= 255
/// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128
const int32_t OFFSETSHIFT = 128;

Expand All @@ -60,7 +60,6 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
}
dim.push_back(d);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, not your change, but
line 46 does not have a correct comment.
Could you fix [-128, 127] and <=255.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wont bother having one more line of credit lol

if (in.data_type() == caffe2::TensorProto::FLOAT) {
T->reset(ElemKind::FloatTy, dim);
return llvm::Error::success();
Expand All @@ -77,6 +76,26 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
RETURN_ERR("Only float and index tensors are supported");
}
}

llvm::Error setTensorType(const caffe2::QTensorProto &in, Tensor *T) {
std::vector<size_t> dim;
for (auto d : in.dims()) {
if (d == 0) {
RETURN_ERR("0 dimemsion qtensor is not supported");
}
dim.push_back(d);
}

if (in.data_type() == caffe2::TensorProto::UINT8) {
T->reset(ElemKind::Int8QTy, dim, in.scale(), in.bias() - OFFSETSHIFT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the use case for what's checked in line 72?

else if (in.data_type() == caffe2::TensorProto::UINT8) {
    T->reset(ElemKind::Int8QTy, dim, 1.0, 0);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rdzhabarov These are two different branches, in #72 we assume the incoming tensor is a non-quantized tensor, and only use Int8QTy to represent a int8 tensor. Here we know it is a quantized tensor (by knowing protobuf is a QTensorProto), we treated it like real quantized tensor.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

linked PR seems to be unrelated.
What is the case when tensor is int8 and non quantized?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh it is not the pr I want to link, it is the 72nd line in this file...
I think you have the point, right now glow takes all int8 input as Int8QTy. do we have normal int8ty as well?

return llvm::Error::success();
} else if (in.data_type() == caffe2::TensorProto::INT32) {
T->reset(ElemKind::Int32QTy, dim, in.scale(), in.bias());
return llvm::Error::success();
} else {
RETURN_ERR("Only uint8 and int32 qtensors are supported");
}
}
} // namespace

/// Translates the protocol buffer node \p op into a random access map.
Expand Down Expand Up @@ -1048,37 +1067,59 @@ llvm::Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
RETURN_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
}

template <class TensorProtoType>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick, add a blank line here before template to separate top-level defs.

llvm::Error
Caffe2ModelLoader::loadInputsWithTensorProtoType(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders,
const TensorProtoType &in) {
// Skip static weights
if (tensors_.count(in.name())) {
return llvm::Error::success();
}

if (loadInputsAsPlaceholders) {
Tensor T;
RETURN_IF_ERR(setTensorType(in, &T));

Placeholder *placeholder;
ASSIGN_VALUE_OR_RETURN_ERR(
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
nameToInputVars_.try_emplace(in.name(), placeholder);
} else {
std::unique_ptr<Tensor> T(new Tensor());
RETURN_IF_ERR(setTensorType(in, T.get()));
tensors_[in.name()] = std::move(T);
}
return llvm::Error::success();
}

llvm::Error Caffe2ModelLoader::loadInputs(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders) {
const caffe2::Argument *arg = nullptr;
for (auto i = 0, e = net.arg_size(); i < e; ++i) {
const caffe2::Argument *arg = nullptr, *qarg = nullptr;
for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) {
if (net.arg(i).name() == "input_shape_info") {
arg = &net.arg(i);
break;
} else if (net.arg(i).name() == "input_qshape_info") {
qarg = &net.arg(i);
}
}

// Load all regular tensor input
if (arg) {
for (const auto &in : arg->tensors()) {
// Skip static weights
if (tensors_.count(in.name())) {
continue;
}

if (loadInputsAsPlaceholders) {
Tensor T;
RETURN_IF_ERR(setTensorType(in, &T));
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>(
net, loadInputsAsPlaceholders, in));
}
}

Placeholder *placeholder;
ASSIGN_VALUE_OR_RETURN_ERR(
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
nameToInputVars_.try_emplace(in.name(), placeholder);
} else {
std::unique_ptr<Tensor> T(new Tensor());
RETURN_IF_ERR(setTensorType(in, T.get()));
tensors_[in.name()] = std::move(T);
}
// Load all quantized tensor input
if (qarg) {
for (const auto &in : qarg->qtensors()) {
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>(
net, loadInputsAsPlaceholders, in));
}
}

return llvm::Error::success();
}

Expand Down
1 change: 1 addition & 0 deletions lib/Importer/caffe2.proto
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ message Argument {
repeated bytes strings = 7;
repeated TensorProto tensors = 11;
repeated NetDef nets = 9;
repeated QTensorProto qtensors = 12;
}

// DeviceType that Caffe2 currently supports.
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/foxi
Submodule foxi updated 1 files
+16 −0 foxi/onnxifi.h