Add a command line parameter in benchmark_tool to set the XNNPack cac…

…he file. PiperOrigin-RevId: 634470123
tensorflow · May 16, 2024 · 62abb36 · 62abb36
1 parent 8d97018
commit 62abb36
Show file tree

Hide file tree

Showing 11 changed files with 294 additions and 14 deletions.
diff --git a/tensorflow/lite/acceleration/configuration/configuration.proto b/tensorflow/lite/acceleration/configuration/configuration.proto
@@ -317,6 +317,10 @@ enum XNNPackFlags {
 message XNNPackSettings {
   optional int32 num_threads = 1;
   optional XNNPackFlags flags = 2 [default = TFLITE_XNNPACK_DELEGATE_NO_FLAGS];
+  // Path to the experimental XNNPack cache file. XNNPack packed buffers are
+  // saved to and reloaded from this cache which can reduce initialization time
+  // and the packing memory footprint.
+  optional string experimental_weight_cache_file_path = 3;
 }
 
 // CoreML Delegate settings.

diff --git a/tensorflow/lite/acceleration/configuration/configuration_generated.h b/tensorflow/lite/acceleration/configuration/configuration_generated.h
@@ -1692,25 +1692,32 @@ struct XNNPackSettingsT : public ::flatbuffers::NativeTable {
   typedef XNNPackSettings TableType;
   int32_t num_threads = 0;
   tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS;
+  std::string experimental_weight_cache_file_path{};
 };
 
 struct XNNPackSettings FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
   typedef XNNPackSettingsT NativeTableType;
   typedef XNNPackSettingsBuilder Builder;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
     VT_NUM_THREADS = 4,
-    VT_FLAGS = 6
+    VT_FLAGS = 6,
+    VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH = 8
   };
   int32_t num_threads() const {
     return GetField<int32_t>(VT_NUM_THREADS, 0);
   }
   tflite::XNNPackFlags flags() const {
     return static_cast<tflite::XNNPackFlags>(GetField<int32_t>(VT_FLAGS, 0));
   }
+  const ::flatbuffers::String *experimental_weight_cache_file_path() const {
+    return GetPointer<const ::flatbuffers::String *>(VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH);
+  }
   bool Verify(::flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int32_t>(verifier, VT_NUM_THREADS, 4) &&
            VerifyField<int32_t>(verifier, VT_FLAGS, 4) &&
+           VerifyOffset(verifier, VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH) &&
+           verifier.VerifyString(experimental_weight_cache_file_path()) &&
            verifier.EndTable();
   }
   XNNPackSettingsT *UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -1728,6 +1735,9 @@ struct XNNPackSettingsBuilder {
   void add_flags(tflite::XNNPackFlags flags) {
     fbb_.AddElement<int32_t>(XNNPackSettings::VT_FLAGS, static_cast<int32_t>(flags), 0);
   }
+  void add_experimental_weight_cache_file_path(::flatbuffers::Offset<::flatbuffers::String> experimental_weight_cache_file_path) {
+    fbb_.AddOffset(XNNPackSettings::VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH, experimental_weight_cache_file_path);
+  }
   explicit XNNPackSettingsBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -1742,13 +1752,28 @@ struct XNNPackSettingsBuilder {
 inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(
     ::flatbuffers::FlatBufferBuilder &_fbb,
     int32_t num_threads = 0,
-    tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS) {
+    tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS,
+    ::flatbuffers::Offset<::flatbuffers::String> experimental_weight_cache_file_path = 0) {
   XNNPackSettingsBuilder builder_(_fbb);
+  builder_.add_experimental_weight_cache_file_path(experimental_weight_cache_file_path);
   builder_.add_flags(flags);
   builder_.add_num_threads(num_threads);
   return builder_.Finish();
 }
 
+inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettingsDirect(
+    ::flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num_threads = 0,
+    tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS,
+    const char *experimental_weight_cache_file_path = nullptr) {
+  auto experimental_weight_cache_file_path__ = experimental_weight_cache_file_path ? _fbb.CreateString(experimental_weight_cache_file_path) : 0;
+  return tflite::CreateXNNPackSettings(
+      _fbb,
+      num_threads,
+      flags,
+      experimental_weight_cache_file_path__);
+}
+
 ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(::flatbuffers::FlatBufferBuilder &_fbb, const XNNPackSettingsT *_o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct CoreMLSettingsT : public ::flatbuffers::NativeTable {
@@ -4911,7 +4936,8 @@ inline ::flatbuffers::Offset<HexagonSettings> CreateHexagonSettings(::flatbuffer
 inline bool operator==(const XNNPackSettingsT &lhs, const XNNPackSettingsT &rhs) {
   return
       (lhs.num_threads == rhs.num_threads) &&
-      (lhs.flags == rhs.flags);
+      (lhs.flags == rhs.flags) &&
+      (lhs.experimental_weight_cache_file_path == rhs.experimental_weight_cache_file_path);
 }
 
 inline bool operator!=(const XNNPackSettingsT &lhs, const XNNPackSettingsT &rhs) {
@@ -4930,6 +4956,7 @@ inline void XNNPackSettings::UnPackTo(XNNPackSettingsT *_o, const ::flatbuffers:
   (void)_resolver;
   { auto _e = num_threads(); _o->num_threads = _e; }
   { auto _e = flags(); _o->flags = _e; }
+  { auto _e = experimental_weight_cache_file_path(); if (_e) _o->experimental_weight_cache_file_path = _e->str(); }
 }
 
 inline ::flatbuffers::Offset<XNNPackSettings> XNNPackSettings::Pack(::flatbuffers::FlatBufferBuilder &_fbb, const XNNPackSettingsT* _o, const ::flatbuffers::rehasher_function_t *_rehasher) {
@@ -4942,10 +4969,12 @@ inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(::flatbuffer
   struct _VectorArgs { ::flatbuffers::FlatBufferBuilder *__fbb; const XNNPackSettingsT* __o; const ::flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
   auto _num_threads = _o->num_threads;
   auto _flags = _o->flags;
+  auto _experimental_weight_cache_file_path = _o->experimental_weight_cache_file_path.empty() ? 0 : _fbb.CreateString(_o->experimental_weight_cache_file_path);
   return tflite::CreateXNNPackSettings(
       _fbb,
       _num_threads,
-      _flags);
+      _flags,
+      _experimental_weight_cache_file_path);
 }
 
 

diff --git a/tensorflow/lite/acceleration/configuration/testdata/configuration.proto_prev b/tensorflow/lite/acceleration/configuration/testdata/configuration.proto_prev
@@ -97,6 +97,8 @@ enum Delegate {
   CORE_ML = 7;
   // Arm NN Delegate.
   ARMNN = 8;
+  // MediaTek Neuron Delegate.
+  MTK_NEURON = 9;
 }
 
 enum NNAPIExecutionPreference {
@@ -662,6 +664,111 @@ message ArmNNSettings {
   optional string additional_parameters = 3;
 }
 
+// MediaTek Neuron Delegate Settings.
+// See https://neuropilot.mediatek.com/ for more information.
+message MtkNeuronSettings {
+  enum ExecutionPreference {
+    PREFERENCE_UNDEFINED = 0;
+
+    // Prefer execution in a power-efficient mode, optimizing for low power
+    // consumption.
+    PREFERENCE_LOW_POWER = 1;
+
+    // Prefer execution that provides shorter single-shot latency, optimizing
+    // for fast response times.
+    PREFERENCE_FAST_SINGLE_ANSWER = 2;
+
+    // Prefer execution that provides sustained speed for continuous operation
+    // and higher throughput, optimizing for overall performance in ongoing or
+    // repetitive tasks.
+    PREFERENCE_SUSTAINED_SPEED = 3;
+
+    // Prefer execution in the turbo boost mode, which may boost the frequencies
+    // of APU and other system components such as CPU and DRAM, to achieve
+    // maximum performance. If boosting is not supported in the underlying
+    // system, it falls back to the behavior of PREFERENCE_FAST_SINGLE_ANSWER.
+    PREFERENCE_TURBO_BOOST = 4;
+  }
+
+  enum ExecutionPriority {
+    PRIORITY_UNDEFINED = 0;
+    PRIORITY_LOW = 90;
+    PRIORITY_MEDIUM = 100;
+    PRIORITY_HIGH = 110;
+  }
+
+  enum OptimizationHint {
+    OPTIMIZATION_NONE = 0;
+
+    // Optimization hint for reducing latency. This hint may distribute the
+    // workload across multiple APU cores in the compiled model to achieve
+    // faster execution.
+    OPTIMIZATION_LOW_LATENCY = 1;
+
+    // Optimization hint for reducing DRAM access and minimizing memory
+    // bandwidth usage through kernel fusion and data fusion techniques.
+    OPTIMIZATION_DEEP_FUSION = 2;
+
+    // Optimization hint for processing multiple input samples in parallel
+    // across available APU cores in the batch dimension. This optimization is
+    // effective for models with a batch size greater than 1.
+    OPTIMIZATION_BATCH_PROCESSING = 3;
+  }
+
+  // How to check the operator compatibility with the underlying accelerator.
+  enum OperationCheckMode {
+    NO_OPERATION_CHECK = 0;
+
+    // Checks each node separately with multiple queries to the backend.
+    PER_NODE_OPERATION_CHECK = 1;
+
+    // Checks all nodes in the graph at once with a batched query to the
+    // backend.
+    PRE_OPERATION_CHECK = 2;
+  }
+
+  // The preferred execution mode. The system-wide default will be used when
+  // PREFERENCE_UNDEFINED is passed to the delegate.
+  optional ExecutionPreference execution_preference = 1;
+
+  // The execution priority of the inference request. The system-wide default
+  // will be used when PRIORITY_UNDEFINED is passed to the delegate.
+  optional ExecutionPriority execution_priority = 2;
+
+  // The optimization hints that will instruct the model compiler.
+  repeated OptimizationHint optimization_hints = 3 [packed = true];
+
+  // Whether and how to check the operator compatibility with the underlying
+  // accelerator.
+  optional OperationCheckMode operation_check_mode = 4;
+
+  // Whether to allow the accelerator to optionally use lower-precision FP16
+  // arithmetic when performing calculations on FP32 data.
+  optional bool allow_fp16_precision_for_fp32 = 5;
+
+  // Whether to use AHardwareBuffer_* API to manage buffers. Requires Android
+  // API level >= 26, or a dedicated AHardwareBuffer API shim on non-Android
+  // platforms.
+  optional bool use_ahwb = 6;
+
+  // Whether to use cachable (consistent / coherent) memory. This will affect
+  // both buffer allocation and buffer importing behaviors.
+  optional bool use_cacheable_buffer = 7 [default = true];
+
+  // Extra options for the Neuron compiler, such as "--opt-bw".
+  // See docs at https://neuropilot.mediatek.com/ for available options.
+  repeated string compile_options = 8;
+
+  // Optional list of target accelerator device names.
+  // If empty, the delegate will automatically select the accelerator.
+  // See docs at https://neuropilot.mediatek.com/ for available accelerators.
+  repeated string accelerator_names = 9;
+
+  // Optional path to the platform-dependent Neuron configuration file.
+  // See docs at https://neuropilot.mediatek.com/ for more details.
+  optional string neuron_config_path = 10;
+}
+
 // How to configure TFLite.
 message TFLiteSettings {
   // Which delegate to use.
@@ -719,6 +826,9 @@ message TFLiteSettings {
 
   // For configuring the Arm NN delegate.
   optional ArmNNSettings armnn_settings = 16;
+
+  // For configuring MediaTek Neuron delegate.
+  optional MtkNeuronSettings mtk_neuron_settings = 17;
 }
 
 // Whether to automatically fallback to TFLite CPU path on delegation errors.

diff --git a/tensorflow/lite/core/acceleration/configuration/c/xnnpack_plugin.cc b/tensorflow/lite/core/acceleration/configuration/c/xnnpack_plugin.cc
@@ -39,6 +39,10 @@ static TfLiteDelegate* CreateDelegate(const void* settings) {
     if (xnnpack_settings->flags()) {
       options.flags = xnnpack_settings->flags();
     }
+    if (xnnpack_settings->experimental_weight_cache_file_path()) {
+      options.experimental_weight_cache_file_path =
+          xnnpack_settings->experimental_weight_cache_file_path()->c_str();
+    }
   }
   return TfLiteXNNPackDelegateCreate(&options);
 }

diff --git a/tensorflow/lite/tools/delegates/BUILD b/tensorflow/lite/tools/delegates/BUILD
@@ -173,6 +173,20 @@ cc_library_with_tflite(
     alwayslink = 1,
 )
 
+cc_test(
+    name = "xnnpack_delegate_provider_test",
+    srcs = ["xnnpack_delegate_provider_test.cc"],
+    copts = tflite_copts(),
+    visibility = ["//visibility:public"],
+    deps = [
+        ":delegate_provider_hdr",
+        ":xnnpack_delegate_provider",
+        "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
+        "//tensorflow/lite/tools:tool_params",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "external_delegate_provider",
     srcs = ["external_delegate_provider.cc"],

diff --git a/tensorflow/lite/tools/delegates/xnnpack_delegate_provider.cc b/tensorflow/lite/tools/delegates/xnnpack_delegate_provider.cc
@@ -27,6 +27,8 @@ class XnnpackDelegateProvider : public DelegateProvider {
     default_params_.AddParam("use_xnnpack", ToolParam::Create<bool>(false));
     default_params_.AddParam("xnnpack_force_fp16",
                              ToolParam::Create<bool>(false));
+    default_params_.AddParam("xnnpack_experimental_weight_cache_file_path",
+                             ToolParam::Create<std::string>(""));
   }
 
   std::vector<Flag> CreateFlags(ToolParams* params) const final;
@@ -54,6 +56,8 @@ std::vector<Flag> XnnpackDelegateProvider::CreateFlags(
                        "false explicitly."),
       CreateFlag<bool>("xnnpack_force_fp16", params,
                        "enforce float16 inference."),
+      CreateFlag<std::string>("xnnpack_experimental_weight_cache_file_path",
+                              params, "enable file-backed weight caching."),
   };
   return flags;
 }
@@ -63,14 +67,19 @@ void XnnpackDelegateProvider::LogParams(const ToolParams& params,
   LOG_TOOL_PARAM(params, bool, "use_xnnpack", "Use xnnpack", verbose);
   LOG_TOOL_PARAM(params, bool, "xnnpack_force_fp16", "xnnpack_force_fp16",
                  verbose);
+  LOG_TOOL_PARAM(params, std::string,
+                 "xnnpack_experimental_weight_cache_file_path",
+                 "xnnpack_experimental_weight_cache_file_path", verbose);
 }
 
 TfLiteDelegatePtr XnnpackDelegateProvider::CreateTfLiteDelegate(
     const ToolParams& params) const {
   if (params.Get<bool>("use_xnnpack")) {
     return evaluation::CreateXNNPACKDelegate(
         params.Get<int32_t>("num_threads"),
-        params.Get<bool>("xnnpack_force_fp16"));
+        params.Get<bool>("xnnpack_force_fp16"),
+        params.Get<std::string>("xnnpack_experimental_weight_cache_file_path")
+            .c_str());
   }
   return CreateNullDelegate();
 }