Merge pull request #772 from khanhlvg:audio-classifier-python-pybinds…

…-dataclasses PiperOrigin-RevId: 440629195
tensorflow · Apr 10, 2022 · 7772577 · 7772577
2 parents b1f13f0 + c09dede
commit 7772577
Show file tree

Hide file tree

Showing 6 changed files with 401 additions and 0 deletions.
diff --git a/tensorflow_lite_support/python/task/audio/BUILD b/tensorflow_lite_support/python/task/audio/BUILD
@@ -20,3 +20,19 @@ py_library(
         "//tensorflow_lite_support/python/task/processor/proto:embedding_pb2",
     ],
 )
+
+py_library(
+    name = "audio_classifier",
+    srcs = [
+        "audio_classifier.py",
+    ],
+    deps = [
+        "//tensorflow_lite_support/python/task/audio/core:audio_record",
+        "//tensorflow_lite_support/python/task/audio/core:tensor_audio",
+        "//tensorflow_lite_support/python/task/audio/core/pybinds:_pywrap_audio_buffer",
+        "//tensorflow_lite_support/python/task/audio/pybinds:_pywrap_audio_classifier",
+        "//tensorflow_lite_support/python/task/core/proto:base_options_py_pb2",
+        "//tensorflow_lite_support/python/task/processor/proto:classification_options_pb2",
+        "//tensorflow_lite_support/python/task/processor/proto:classifications_pb2",
+    ],
+)
diff --git a/tensorflow_lite_support/python/task/audio/audio_classifier.py b/tensorflow_lite_support/python/task/audio/audio_classifier.py
@@ -0,0 +1,133 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Audio classifier task."""
+
+import dataclasses
+
+from tensorflow_lite_support.python.task.audio.core import audio_record
+from tensorflow_lite_support.python.task.audio.core import tensor_audio
+from tensorflow_lite_support.python.task.audio.core.pybinds import _pywrap_audio_buffer
+from tensorflow_lite_support.python.task.audio.pybinds import _pywrap_audio_classifier
+from tensorflow_lite_support.python.task.core.proto import base_options_pb2
+from tensorflow_lite_support.python.task.processor.proto import classification_options_pb2
+from tensorflow_lite_support.python.task.processor.proto import classifications_pb2
+
+_CppAudioFormat = _pywrap_audio_buffer.AudioFormat
+_CppAudioBuffer = _pywrap_audio_buffer.AudioBuffer
+_CppAudioClassifier = _pywrap_audio_classifier.AudioClassifier
+_ClassificationOptions = classification_options_pb2.ClassificationOptions
+_BaseOptions = base_options_pb2.BaseOptions
+
+
+@dataclasses.dataclass
+class AudioClassifierOptions:
+  """Options for the audio classifier task."""
+  base_options: _BaseOptions
+  classification_options: _ClassificationOptions = _ClassificationOptions()
+
+
+class AudioClassifier(object):
+  """Class that performs classification on audio."""
+
+  def __init__(self, options: AudioClassifierOptions,
+               classifier: _CppAudioClassifier) -> None:
+    """Initializes the `AudioClassifier` object."""
+    # Creates the object of C++ AudioClassifier class.
+    self._options = options
+    self._classifier = classifier
+
+  @classmethod
+  def create_from_file(cls, file_path: str) -> "AudioClassifier":
+    """Creates the `AudioClassifier` object from a TensorFlow Lite model.
+
+    Args:
+      file_path: Path to the model.
+
+    Returns:
+      `AudioClassifier` object that's created from `options`.
+
+    Raises:
+      RuntimeError if failed to create `AudioClassifier` object from the
+      provided file such as invalid file.
+    """
+    base_options = _BaseOptions(file_name=file_path)
+    options = AudioClassifierOptions(base_options=base_options)
+    return cls.create_from_options(options)
+
+  @classmethod
+  def create_from_options(cls,
+                          options: AudioClassifierOptions) -> "AudioClassifier":
+    """Creates the `AudioClassifier` object from audio classifier options.
+
+    Args:
+      options: Options for the audio classifier task.
+
+    Returns:
+      `AudioClassifier` object that's created from `options`.
+
+    Raises:
+      RuntimeError if failed to create `AudioClassifier` object from
+      `AudioClassifierOptions` such as missing the model.
+    """
+    classifier = _CppAudioClassifier.create_from_options(
+        options.base_options, options.classification_options)
+    return cls(options, classifier)
+
+  def create_input_tensor_audio(self) -> tensor_audio.TensorAudio:
+    """Creates a TensorAudio instance to store the audio input.
+
+    Returns:
+        A TensorAudio instance.
+    """
+    return tensor_audio.TensorAudio(
+        audio_format=self.required_audio_format,
+        buffer_size=self.required_input_buffer_size)
+
+  def create_audio_record(self) -> audio_record.AudioRecord:
+    """Creates an AudioRecord instance to record audio.
+
+    Returns:
+        An AudioRecord instance.
+    """
+    return audio_record.AudioRecord(self.required_audio_format.channels,
+                                    self.required_audio_format.sample_rate,
+                                    self.required_input_buffer_size)
+
+  def classify(
+      self,
+      audio: tensor_audio.TensorAudio,
+  ) -> classifications_pb2.ClassificationResult:
+    """Performs classification on the provided TensorAudio.
+
+    Args:
+      audio: Tensor audio, used to extract the feature vectors.
+
+    Returns:
+      classification result.
+
+    Raises:
+      RuntimeError if failed to get the feature vector.
+    """
+    return self._classifier.classify(
+        _CppAudioBuffer(audio.buffer, audio.buffer_size, audio.format))
+
+  @property
+  def required_input_buffer_size(self) -> int:
+    """Gets the required input buffer size for the model."""
+    return self._classifier.get_required_input_buffer_size()
+
+  @property
+  def required_audio_format(self) -> _CppAudioFormat:
+    """Gets the required audio format for the model."""
+    return self._classifier.get_required_audio_format()
diff --git a/tensorflow_lite_support/python/task/audio/pybinds/BUILD b/tensorflow_lite_support/python/task/audio/pybinds/BUILD
@@ -23,3 +23,19 @@ pybind_extension(
         "@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
     ],
 )
+
+pybind_extension(
+    name = "_pywrap_audio_classifier",
+    srcs = [
+        "_pywrap_audio_classifier.cc",
+    ],
+    module_name = "_pywrap_audio_classifier",
+    deps = [
+        "//tensorflow_lite_support/cc/task/audio:audio_classifier",
+        "//tensorflow_lite_support/cc/task/audio/core:audio_buffer",
+        "//tensorflow_lite_support/cc/task/processor/proto:classification_options_cc_proto",
+        "//tensorflow_lite_support/python/task/core/pybinds:task_utils",
+        "@pybind11",
+        "@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
+    ],
+)
diff --git a/tensorflow_lite_support/python/task/audio/pybinds/_pywrap_audio_classifier.cc b/tensorflow_lite_support/python/task/audio/pybinds/_pywrap_audio_classifier.cc
@@ -0,0 +1,84 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "pybind11/pybind11.h"
+#include "pybind11_protobuf/native_proto_caster.h"  // from @pybind11_protobuf
+#include "tensorflow_lite_support/cc/task/audio/audio_classifier.h"
+#include "tensorflow_lite_support/cc/task/audio/core/audio_buffer.h"
+#include "tensorflow_lite_support/cc/task/processor/proto/classification_options.pb.h"
+#include "tensorflow_lite_support/python/task/core/pybinds/task_utils.h"
+
+namespace tflite {
+namespace task {
+namespace audio {
+
+namespace {
+namespace py = ::pybind11;
+using PythonBaseOptions = ::tflite::python::task::core::BaseOptions;
+using CppBaseOptions = ::tflite::task::core::BaseOptions;
+}  // namespace
+
+PYBIND11_MODULE(_pywrap_audio_classifier, m) {
+  // python wrapper for C++ AudioClassifier class which shouldn't be directly
+  // used by the users.
+  pybind11_protobuf::ImportNativeProtoCasters();
+
+  py::class_<AudioClassifier>(m, "AudioClassifier")
+      .def_static(
+          "create_from_options",
+          [](const PythonBaseOptions& base_options,
+             const processor::ClassificationOptions& classification_options) {
+            AudioClassifierOptions options;
+            auto cpp_base_options =
+                core::convert_to_cpp_base_options(base_options);
+            options.set_allocated_base_options(cpp_base_options.release());
+
+            if (classification_options.has_display_names_locale()) {
+              options.set_display_names_locale(
+                  classification_options.display_names_locale());
+            }
+            if (classification_options.has_max_results()) {
+              options.set_max_results(classification_options.max_results());
+            }
+            if (classification_options.has_score_threshold()) {
+              options.set_score_threshold(
+                  classification_options.score_threshold());
+            }
+            options.mutable_class_name_allowlist()->CopyFrom(
+                classification_options.class_name_allowlist());
+            options.mutable_class_name_denylist()->CopyFrom(
+                classification_options.class_name_denylist());
+
+            auto classifier = AudioClassifier::CreateFromOptions(options);
+            return core::get_value(classifier);
+          })
+      .def("classify",
+           [](AudioClassifier& self,
+              const AudioBuffer& audio_buffer) -> ClassificationResult {
+             auto classification_result = self.Classify(audio_buffer);
+             return core::get_value(classification_result);
+           })
+      .def("get_required_audio_format",
+           [](AudioClassifier& self) -> AudioBuffer::AudioFormat {
+             auto audio_format = self.GetRequiredAudioFormat();
+             return core::get_value(audio_format);
+           })
+      .def("get_required_input_buffer_size",
+           &AudioClassifier::GetRequiredInputBufferSize);
+}
+
+}  // namespace audio
+}  // namespace task
+}  // namespace tflite
diff --git a/tensorflow_lite_support/python/test/task/audio/BUILD b/tensorflow_lite_support/python/test/task/audio/BUILD
@@ -22,3 +22,24 @@ py_test(
         "@absl_py//absl/testing:parameterized",
     ],
 )
+
+py_test(
+    name = "audio_classifier_test",
+    srcs = ["audio_classifier_test.py"],
+    data = [
+        "//tensorflow_lite_support/cc/test/testdata/task/audio:test_audio_clips",
+        "//tensorflow_lite_support/cc/test/testdata/task/audio:test_models",
+    ],
+    deps = [
+        "//tensorflow_lite_support/python/task/audio:audio_classifier",
+        "//tensorflow_lite_support/python/task/audio/core:tensor_audio",
+        "//tensorflow_lite_support/python/task/core/proto:base_options_py_pb2",
+        "//tensorflow_lite_support/python/task/processor/proto:class_pb2",
+        "//tensorflow_lite_support/python/task/processor/proto:classification_options_pb2",
+        "//tensorflow_lite_support/python/task/processor/proto:classifications_pb2",
+        "//tensorflow_lite_support/python/test:base_test",
+        "//tensorflow_lite_support/python/test:test_util",
+        "@absl_py//absl/testing:parameterized",
+        "@com_google_protobuf//:protobuf_python",
+    ],
+)