pytorch · rdzhabarov · Mar 29, 2019 · Mar 28, 2019 · Mar 29, 2019 · Mar 29, 2019
diff --git a/include/glow/Importer/Caffe2ModelLoader.h b/include/glow/Importer/Caffe2ModelLoader.h
@@ -67,6 +67,14 @@ class Caffe2ModelLoader
   /// Mapping between Caffe2 tensor names for inputs and actual Glow input vars.
   llvm::StringMap<Placeholder *> nameToInputVars_;
 
+  /// loadInputs calls this function for each member in its target arguments.
+  /// Currently we are supporting two tensorprototypes:
+  /// caffe2::TensorProto, caffe2::QTensorProto
+  template <class TensorProtoType>
+  llvm::Error loadInputsWithTensorProtoType(const caffe2::NetDef &net,
+                                            bool loadInputsAsPlaceholders,
+                                            const TensorProtoType &in);
+
   /// Load the inputs from the NetDef. If \p loadInputsAsPlaceholders is
   /// true then this will load each graph input as a placeholder otherwise it
   /// will create an empty tensor for each input.

diff --git a/include/glow/Importer/CommonOperatorLoader.h b/include/glow/Importer/CommonOperatorLoader.h
@@ -40,54 +40,65 @@ inline llvm::Error loadWeight(const onnxTensorDescriptorV1 &in, Tensor *T) {
     RETURN_ERR("Only support CPU memory tensors.");
   }
 
+  // This is a caffe2 offset shift.
+  const int32_t OFFSETSHIFT = 128;
   std::vector<size_t> dims;
   for (unsigned i = 0; i < in.dimensions; ++i) {
     dims.push_back(in.shape[i]);
   }
-
-  if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
-    T->reset(ElemKind::FloatTy, dims);
-
-    auto TH = T->getHandle<>();
-    float *data = (float *)in.buffer;
-    for (size_t i = 0; i < TH.size(); ++i) {
-      TH.raw(i) = data[i];
-    }
-  } else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
-             in.dataType == ONNXIFI_DATATYPE_INT64) {
-    const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
-    (void)inDataSigned;
-    T->reset(ElemKind::Int64ITy, dims);
-
-    auto TH = T->getHandle<int64_t>();
-    int64_t *data = (int64_t *)in.buffer;
-    for (size_t i = 0; i < TH.size(); ++i) {
-      RETURN_ERR_IF_NOT(
-          (inDataSigned || data[i] >= 0),
-          "Disallow overflow of loaded UINT64 data into Int64ITy.");
-      TH.raw(i) = data[i];
-    }
-  } else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
-    T->reset(ElemKind::Int32ITy, dims);
-
-    auto TH = T->getHandle<int32_t>();
-    int32_t *data = (int32_t *)in.buffer;
-    for (size_t i = 0; i < TH.size(); ++i) {
-      TH.raw(i) = data[i];
-    }
-  } else if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
-    T->reset(ElemKind::Int8QTy, dims, 1.0, 0);
-
-    auto TH = T->getHandle<int8_t>();
-    uint8_t *data = (uint8_t *)in.buffer;
-    for (size_t i = 0; i < TH.size(); ++i) {
-      constexpr uint8_t OFFSETSHIFT = 128;
-      TH.raw(i) = static_cast<int8_t>((((uint8_t)data[i]) - OFFSETSHIFT));
+  if (in.is_quantized == 1) {
+    if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
+      T->reset(ElemKind::Int8QTy, dims, in.scale, in.bias - OFFSETSHIFT);
+
+      auto TH = T->getHandle<int8_t>();
+      uint8_t *data = (uint8_t *)in.buffer;
+      for (size_t i = 0; i < TH.size(); ++i) {
+        TH.raw(i) = (int8_t)(data[i] - OFFSETSHIFT);
+      }
+    } else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
+      T->reset(ElemKind::Int32QTy, dims, in.scale, in.bias);
+      auto TH = T->getHandle<int32_t>();
+      int32_t *data = (int32_t *)in.buffer;
+      for (size_t i = 0; i < TH.size(); ++i) {
+        TH.raw(i) = data[i];
+      }
+    } else {
+      RETURN_ERR("Only uint8 and int32 quantized tensors are supported.");
     }
   } else {
-    RETURN_ERR("Only float, index, and int8 tensors are supported.");
-  }
+    if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
+      T->reset(ElemKind::FloatTy, dims);
+
+      auto TH = T->getHandle<>();
+      float *data = (float *)in.buffer;
+      for (size_t i = 0; i < TH.size(); ++i) {
+        TH.raw(i) = data[i];
+      }
+    } else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
+               in.dataType == ONNXIFI_DATATYPE_INT64) {
+      const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
+      T->reset(ElemKind::Int64ITy, dims);
+
+      auto TH = T->getHandle<int64_t>();
+      int64_t *data = (int64_t *)in.buffer;
+      for (size_t i = 0; i < TH.size(); ++i) {
+        RETURN_ERR_IF_NOT(
+            (inDataSigned || data[i] >= 0),
+            "Disallow overflow of loaded UINT64 data into Int64ITy.");
+        TH.raw(i) = data[i];
+      }
+    } else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
+      T->reset(ElemKind::Int32ITy, dims);
 
+      auto TH = T->getHandle<int32_t>();
+      int32_t *data = (int32_t *)in.buffer;
+      for (size_t i = 0; i < TH.size(); ++i) {
+        TH.raw(i) = data[i];
+      }
+    } else {
+      RETURN_ERR("Only float and index tensors are supported.");
+    }
+  }
   return llvm::Error::success();
 }
 

diff --git a/lib/Importer/Caffe2ModelLoader.cpp b/lib/Importer/Caffe2ModelLoader.cpp
@@ -43,8 +43,8 @@ using ArgumentDictionaryTy =
 /// In Glow, the activations are quantized to int_8. Therefore, for the offset
 /// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to
 /// make the activations becomes int8_t.
-/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128
-/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255
+/// For Glow: -128 <= orig_fp32/scale_1 + offset_1 <= 127
+/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 <= 255
 /// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128
 const int32_t OFFSETSHIFT = 128;
 
@@ -60,7 +60,6 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
     }
     dim.push_back(d);
   }
-
   if (in.data_type() == caffe2::TensorProto::FLOAT) {
     T->reset(ElemKind::FloatTy, dim);
     return llvm::Error::success();
@@ -77,6 +76,26 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
     RETURN_ERR("Only float and index tensors are supported");
   }
 }
+
+llvm::Error setTensorType(const caffe2::QTensorProto &in, Tensor *T) {
+  std::vector<size_t> dim;
+  for (auto d : in.dims()) {
+    if (d == 0) {
+      RETURN_ERR("0 dimemsion qtensor is not supported");
+    }
+    dim.push_back(d);
+  }
+
+  if (in.data_type() == caffe2::TensorProto::UINT8) {
+    T->reset(ElemKind::Int8QTy, dim, in.scale(), in.bias() - OFFSETSHIFT);
+    return llvm::Error::success();
+  } else if (in.data_type() == caffe2::TensorProto::INT32) {
+    T->reset(ElemKind::Int32QTy, dim, in.scale(), in.bias());
+    return llvm::Error::success();
+  } else {
+    RETURN_ERR("Only uint8 and int32 qtensors are supported");
+  }
+}
 } // namespace
 
 /// Translates the protocol buffer node \p op into a random access map.
@@ -1048,37 +1067,59 @@ llvm::Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
   RETURN_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
 }
 
+template <class TensorProtoType>
+llvm::Error
+Caffe2ModelLoader::loadInputsWithTensorProtoType(const caffe2::NetDef &net,
+                                                 bool loadInputsAsPlaceholders,
+                                                 const TensorProtoType &in) {
+  // Skip static weights
+  if (tensors_.count(in.name())) {
+    return llvm::Error::success();
+  }
+
+  if (loadInputsAsPlaceholders) {
+    Tensor T;
+    RETURN_IF_ERR(setTensorType(in, &T));
+
+    Placeholder *placeholder;
+    ASSIGN_VALUE_OR_RETURN_ERR(
+        placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
+    nameToInputVars_.try_emplace(in.name(), placeholder);
+  } else {
+    std::unique_ptr<Tensor> T(new Tensor());
+    RETURN_IF_ERR(setTensorType(in, T.get()));
+    tensors_[in.name()] = std::move(T);
+  }
+  return llvm::Error::success();
+}
+
 llvm::Error Caffe2ModelLoader::loadInputs(const caffe2::NetDef &net,
                                           bool loadInputsAsPlaceholders) {
-  const caffe2::Argument *arg = nullptr;
-  for (auto i = 0, e = net.arg_size(); i < e; ++i) {
+  const caffe2::Argument *arg = nullptr, *qarg = nullptr;
+  for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) {
     if (net.arg(i).name() == "input_shape_info") {
       arg = &net.arg(i);
-      break;
+    } else if (net.arg(i).name() == "input_qshape_info") {
+      qarg = &net.arg(i);
     }
   }
+
+  // Load all regular tensor input
   if (arg) {
     for (const auto &in : arg->tensors()) {
-      // Skip static weights
-      if (tensors_.count(in.name())) {
-        continue;
-      }
-
-      if (loadInputsAsPlaceholders) {
-        Tensor T;
-        RETURN_IF_ERR(setTensorType(in, &T));
+      RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>(
+          net, loadInputsAsPlaceholders, in));
+    }
+  }
 
-        Placeholder *placeholder;
-        ASSIGN_VALUE_OR_RETURN_ERR(
-            placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
-        nameToInputVars_.try_emplace(in.name(), placeholder);
-      } else {
-        std::unique_ptr<Tensor> T(new Tensor());
-        RETURN_IF_ERR(setTensorType(in, T.get()));
-        tensors_[in.name()] = std::move(T);
-      }
+  // Load all quantized tensor input
+  if (qarg) {
+    for (const auto &in : qarg->qtensors()) {
+      RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>(
+          net, loadInputsAsPlaceholders, in));
     }
   }
+
   return llvm::Error::success();
 }
 

diff --git a/lib/Importer/caffe2.proto b/lib/Importer/caffe2.proto
@@ -163,6 +163,7 @@ message Argument {
   repeated bytes strings = 7;
   repeated TensorProto tensors = 11;
   repeated NetDef nets = 9;
+  repeated QTensorProto qtensors = 12;
 }
 
 // DeviceType that Caffe2 currently supports.

diff --git a/thirdparty/foxi b/thirdparty/foxi