[go: nahoru, domu]

Skip to content

Commit

Permalink
Add a command line parameter in benchmark_tool to set the XNNPack cac…
Browse files Browse the repository at this point in the history
…he file.

PiperOrigin-RevId: 634470123
  • Loading branch information
qukhan authored and tensorflower-gardener committed May 16, 2024
1 parent 8d97018 commit 62abb36
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ enum XNNPackFlags {
message XNNPackSettings {
optional int32 num_threads = 1;
optional XNNPackFlags flags = 2 [default = TFLITE_XNNPACK_DELEGATE_NO_FLAGS];
// Path to the experimental XNNPack cache file. XNNPack packed buffers are
// saved to and reloaded from this cache which can reduce initialization time
// and the packing memory footprint.
optional string experimental_weight_cache_file_path = 3;
}

// CoreML Delegate settings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1692,25 +1692,32 @@ struct XNNPackSettingsT : public ::flatbuffers::NativeTable {
typedef XNNPackSettings TableType;
int32_t num_threads = 0;
tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS;
std::string experimental_weight_cache_file_path{};
};

struct XNNPackSettings FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
typedef XNNPackSettingsT NativeTableType;
typedef XNNPackSettingsBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_NUM_THREADS = 4,
VT_FLAGS = 6
VT_FLAGS = 6,
VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH = 8
};
int32_t num_threads() const {
return GetField<int32_t>(VT_NUM_THREADS, 0);
}
tflite::XNNPackFlags flags() const {
return static_cast<tflite::XNNPackFlags>(GetField<int32_t>(VT_FLAGS, 0));
}
const ::flatbuffers::String *experimental_weight_cache_file_path() const {
return GetPointer<const ::flatbuffers::String *>(VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH);
}
bool Verify(::flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int32_t>(verifier, VT_NUM_THREADS, 4) &&
VerifyField<int32_t>(verifier, VT_FLAGS, 4) &&
VerifyOffset(verifier, VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH) &&
verifier.VerifyString(experimental_weight_cache_file_path()) &&
verifier.EndTable();
}
XNNPackSettingsT *UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const;
Expand All @@ -1728,6 +1735,9 @@ struct XNNPackSettingsBuilder {
void add_flags(tflite::XNNPackFlags flags) {
fbb_.AddElement<int32_t>(XNNPackSettings::VT_FLAGS, static_cast<int32_t>(flags), 0);
}
void add_experimental_weight_cache_file_path(::flatbuffers::Offset<::flatbuffers::String> experimental_weight_cache_file_path) {
fbb_.AddOffset(XNNPackSettings::VT_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH, experimental_weight_cache_file_path);
}
explicit XNNPackSettingsBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
Expand All @@ -1742,13 +1752,28 @@ struct XNNPackSettingsBuilder {
inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(
::flatbuffers::FlatBufferBuilder &_fbb,
int32_t num_threads = 0,
tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS) {
tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS,
::flatbuffers::Offset<::flatbuffers::String> experimental_weight_cache_file_path = 0) {
XNNPackSettingsBuilder builder_(_fbb);
builder_.add_experimental_weight_cache_file_path(experimental_weight_cache_file_path);
builder_.add_flags(flags);
builder_.add_num_threads(num_threads);
return builder_.Finish();
}

inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettingsDirect(
::flatbuffers::FlatBufferBuilder &_fbb,
int32_t num_threads = 0,
tflite::XNNPackFlags flags = tflite::XNNPackFlags_TFLITE_XNNPACK_DELEGATE_NO_FLAGS,
const char *experimental_weight_cache_file_path = nullptr) {
auto experimental_weight_cache_file_path__ = experimental_weight_cache_file_path ? _fbb.CreateString(experimental_weight_cache_file_path) : 0;
return tflite::CreateXNNPackSettings(
_fbb,
num_threads,
flags,
experimental_weight_cache_file_path__);
}

::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(::flatbuffers::FlatBufferBuilder &_fbb, const XNNPackSettingsT *_o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr);

struct CoreMLSettingsT : public ::flatbuffers::NativeTable {
Expand Down Expand Up @@ -4911,7 +4936,8 @@ inline ::flatbuffers::Offset<HexagonSettings> CreateHexagonSettings(::flatbuffer
inline bool operator==(const XNNPackSettingsT &lhs, const XNNPackSettingsT &rhs) {
return
(lhs.num_threads == rhs.num_threads) &&
(lhs.flags == rhs.flags);
(lhs.flags == rhs.flags) &&
(lhs.experimental_weight_cache_file_path == rhs.experimental_weight_cache_file_path);
}

inline bool operator!=(const XNNPackSettingsT &lhs, const XNNPackSettingsT &rhs) {
Expand All @@ -4930,6 +4956,7 @@ inline void XNNPackSettings::UnPackTo(XNNPackSettingsT *_o, const ::flatbuffers:
(void)_resolver;
{ auto _e = num_threads(); _o->num_threads = _e; }
{ auto _e = flags(); _o->flags = _e; }
{ auto _e = experimental_weight_cache_file_path(); if (_e) _o->experimental_weight_cache_file_path = _e->str(); }
}

inline ::flatbuffers::Offset<XNNPackSettings> XNNPackSettings::Pack(::flatbuffers::FlatBufferBuilder &_fbb, const XNNPackSettingsT* _o, const ::flatbuffers::rehasher_function_t *_rehasher) {
Expand All @@ -4942,10 +4969,12 @@ inline ::flatbuffers::Offset<XNNPackSettings> CreateXNNPackSettings(::flatbuffer
struct _VectorArgs { ::flatbuffers::FlatBufferBuilder *__fbb; const XNNPackSettingsT* __o; const ::flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
auto _num_threads = _o->num_threads;
auto _flags = _o->flags;
auto _experimental_weight_cache_file_path = _o->experimental_weight_cache_file_path.empty() ? 0 : _fbb.CreateString(_o->experimental_weight_cache_file_path);
return tflite::CreateXNNPackSettings(
_fbb,
_num_threads,
_flags);
_flags,
_experimental_weight_cache_file_path);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ enum Delegate {
CORE_ML = 7;
// Arm NN Delegate.
ARMNN = 8;
// MediaTek Neuron Delegate.
MTK_NEURON = 9;
}

enum NNAPIExecutionPreference {
Expand Down Expand Up @@ -662,6 +664,111 @@ message ArmNNSettings {
optional string additional_parameters = 3;
}

// MediaTek Neuron Delegate Settings.
// See https://neuropilot.mediatek.com/ for more information.
message MtkNeuronSettings {
enum ExecutionPreference {
PREFERENCE_UNDEFINED = 0;

// Prefer execution in a power-efficient mode, optimizing for low power
// consumption.
PREFERENCE_LOW_POWER = 1;

// Prefer execution that provides shorter single-shot latency, optimizing
// for fast response times.
PREFERENCE_FAST_SINGLE_ANSWER = 2;

// Prefer execution that provides sustained speed for continuous operation
// and higher throughput, optimizing for overall performance in ongoing or
// repetitive tasks.
PREFERENCE_SUSTAINED_SPEED = 3;

// Prefer execution in the turbo boost mode, which may boost the frequencies
// of APU and other system components such as CPU and DRAM, to achieve
// maximum performance. If boosting is not supported in the underlying
// system, it falls back to the behavior of PREFERENCE_FAST_SINGLE_ANSWER.
PREFERENCE_TURBO_BOOST = 4;
}

enum ExecutionPriority {
PRIORITY_UNDEFINED = 0;
PRIORITY_LOW = 90;
PRIORITY_MEDIUM = 100;
PRIORITY_HIGH = 110;
}

enum OptimizationHint {
OPTIMIZATION_NONE = 0;

// Optimization hint for reducing latency. This hint may distribute the
// workload across multiple APU cores in the compiled model to achieve
// faster execution.
OPTIMIZATION_LOW_LATENCY = 1;

// Optimization hint for reducing DRAM access and minimizing memory
// bandwidth usage through kernel fusion and data fusion techniques.
OPTIMIZATION_DEEP_FUSION = 2;

// Optimization hint for processing multiple input samples in parallel
// across available APU cores in the batch dimension. This optimization is
// effective for models with a batch size greater than 1.
OPTIMIZATION_BATCH_PROCESSING = 3;
}

// How to check the operator compatibility with the underlying accelerator.
enum OperationCheckMode {
NO_OPERATION_CHECK = 0;

// Checks each node separately with multiple queries to the backend.
PER_NODE_OPERATION_CHECK = 1;

// Checks all nodes in the graph at once with a batched query to the
// backend.
PRE_OPERATION_CHECK = 2;
}

// The preferred execution mode. The system-wide default will be used when
// PREFERENCE_UNDEFINED is passed to the delegate.
optional ExecutionPreference execution_preference = 1;

// The execution priority of the inference request. The system-wide default
// will be used when PRIORITY_UNDEFINED is passed to the delegate.
optional ExecutionPriority execution_priority = 2;

// The optimization hints that will instruct the model compiler.
repeated OptimizationHint optimization_hints = 3 [packed = true];

// Whether and how to check the operator compatibility with the underlying
// accelerator.
optional OperationCheckMode operation_check_mode = 4;

// Whether to allow the accelerator to optionally use lower-precision FP16
// arithmetic when performing calculations on FP32 data.
optional bool allow_fp16_precision_for_fp32 = 5;

// Whether to use AHardwareBuffer_* API to manage buffers. Requires Android
// API level >= 26, or a dedicated AHardwareBuffer API shim on non-Android
// platforms.
optional bool use_ahwb = 6;

// Whether to use cachable (consistent / coherent) memory. This will affect
// both buffer allocation and buffer importing behaviors.
optional bool use_cacheable_buffer = 7 [default = true];

// Extra options for the Neuron compiler, such as "--opt-bw".
// See docs at https://neuropilot.mediatek.com/ for available options.
repeated string compile_options = 8;

// Optional list of target accelerator device names.
// If empty, the delegate will automatically select the accelerator.
// See docs at https://neuropilot.mediatek.com/ for available accelerators.
repeated string accelerator_names = 9;

// Optional path to the platform-dependent Neuron configuration file.
// See docs at https://neuropilot.mediatek.com/ for more details.
optional string neuron_config_path = 10;
}

// How to configure TFLite.
message TFLiteSettings {
// Which delegate to use.
Expand Down Expand Up @@ -719,6 +826,9 @@ message TFLiteSettings {

// For configuring the Arm NN delegate.
optional ArmNNSettings armnn_settings = 16;

// For configuring MediaTek Neuron delegate.
optional MtkNeuronSettings mtk_neuron_settings = 17;
}

// Whether to automatically fallback to TFLite CPU path on delegation errors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ static TfLiteDelegate* CreateDelegate(const void* settings) {
if (xnnpack_settings->flags()) {
options.flags = xnnpack_settings->flags();
}
if (xnnpack_settings->experimental_weight_cache_file_path()) {
options.experimental_weight_cache_file_path =
xnnpack_settings->experimental_weight_cache_file_path()->c_str();
}
}
return TfLiteXNNPackDelegateCreate(&options);
}
Expand Down
14 changes: 14 additions & 0 deletions tensorflow/lite/tools/delegates/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,20 @@ cc_library_with_tflite(
alwayslink = 1,
)

cc_test(
name = "xnnpack_delegate_provider_test",
srcs = ["xnnpack_delegate_provider_test.cc"],
copts = tflite_copts(),
visibility = ["//visibility:public"],
deps = [
":delegate_provider_hdr",
":xnnpack_delegate_provider",
"//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
"//tensorflow/lite/tools:tool_params",
"@com_google_googletest//:gtest_main",
],
)

cc_library(
name = "external_delegate_provider",
srcs = ["external_delegate_provider.cc"],
Expand Down
11 changes: 10 additions & 1 deletion tensorflow/lite/tools/delegates/xnnpack_delegate_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class XnnpackDelegateProvider : public DelegateProvider {
default_params_.AddParam("use_xnnpack", ToolParam::Create<bool>(false));
default_params_.AddParam("xnnpack_force_fp16",
ToolParam::Create<bool>(false));
default_params_.AddParam("xnnpack_experimental_weight_cache_file_path",
ToolParam::Create<std::string>(""));
}

std::vector<Flag> CreateFlags(ToolParams* params) const final;
Expand Down Expand Up @@ -54,6 +56,8 @@ std::vector<Flag> XnnpackDelegateProvider::CreateFlags(
"false explicitly."),
CreateFlag<bool>("xnnpack_force_fp16", params,
"enforce float16 inference."),
CreateFlag<std::string>("xnnpack_experimental_weight_cache_file_path",
params, "enable file-backed weight caching."),
};
return flags;
}
Expand All @@ -63,14 +67,19 @@ void XnnpackDelegateProvider::LogParams(const ToolParams& params,
LOG_TOOL_PARAM(params, bool, "use_xnnpack", "Use xnnpack", verbose);
LOG_TOOL_PARAM(params, bool, "xnnpack_force_fp16", "xnnpack_force_fp16",
verbose);
LOG_TOOL_PARAM(params, std::string,
"xnnpack_experimental_weight_cache_file_path",
"xnnpack_experimental_weight_cache_file_path", verbose);
}

TfLiteDelegatePtr XnnpackDelegateProvider::CreateTfLiteDelegate(
const ToolParams& params) const {
if (params.Get<bool>("use_xnnpack")) {
return evaluation::CreateXNNPACKDelegate(
params.Get<int32_t>("num_threads"),
params.Get<bool>("xnnpack_force_fp16"));
params.Get<bool>("xnnpack_force_fp16"),
params.Get<std::string>("xnnpack_experimental_weight_cache_file_path")
.c_str());
}
return CreateNullDelegate();
}
Expand Down
Loading

0 comments on commit 62abb36

Please sign in to comment.