[go: nahoru, domu]

Skip to content

Commit

Permalink
Add Bert QA iOS
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 306344585
  • Loading branch information
norangLemon authored and Copybara-Service committed Apr 14, 2020
1 parent 14c8cf1 commit 2839c06
Show file tree
Hide file tree
Showing 67 changed files with 4,252 additions and 0 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"images":[{"size":"60x60","expected-size":"180","filename":"180.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"40x40","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"60x60","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"57x57","expected-size":"57","filename":"57.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"87","filename":"87.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"57x57","expected-size":"114","filename":"114.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"60","filename":"60.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"1024x1024","filename":"1024.png","expected-size":"1024","idiom":"ios-marketing","folder":"Assets.xcassets/AppIcon.appiconset/","scale":"1x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"72x72","expected-size":"72","filename":"72.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"76x76","expected-size":"152","filename":"152.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"50x50","expected-size":"100","filename":"100.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"76x76","expected-size":"76","filename":"76.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"50x50","expected-size":"50","filename":"50.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"72x72","expected-size":"144","filename":"144.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"40x40","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"83.5x83.5","expected-size":"167","filename":"167.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"20x20","expected-size":"20","filename":"20.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"}]}
6 changes: 6 additions & 0 deletions lite/examples/bert_qa/ios/Assets.xcassets/Contents.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"info" : {
"version" : 1,
"author" : "xcode"
}
}
992 changes: 992 additions & 0 deletions lite/examples/bert_qa/ios/BertQA.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions lite/examples/bert_qa/ios/BertQACore/Constants.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import UIKit

enum InterpreterOptions {
// Default thread count is 2, unless maximum thread count is 1.
static let threadCount = (
defaultValue: 2,
minimumValue: 1,
maximumValue: Int(ProcessInfo.processInfo.activeProcessorCount),
id: "threadCount"
)
}

enum MobileBERT {
static let maxAnsLen = 32
static let maxQueryLen = 64
static let maxSeqLen = 384

static let predictAnsNum = 5
static let outputOffset = 1 // Need to shift 1 for outputs ([CLS])

static let doLowerCase = true

static let inputDimension = [1, MobileBERT.maxSeqLen]
static let outputDimension = [1, MobileBERT.maxSeqLen]

static let dataset = File(name: "contents_from_squad_dict_format", ext: "json")
static let vocabulary = File(name: "vocab", ext: "txt")
static let model = File(name: "mobilebert_float_20191023", ext: "tflite")
}

struct File {
let name: String
let ext: String
let description: String

init(name: String, ext: String) {
self.name = name
self.ext = ext
self.description = "\(name).\(ext)"
}
}

enum CustomUI {
static let textHighlightColor = UIColor(red: 1.0, green: 0.7, blue: 0.0, alpha: 0.3)

static let runButtonOpacity = 0.8

static let statusTextViewCornerRadius = CGFloat(7)
static let suggestedQuestionCornerRadius = CGFloat(10)

static let keyboardAnimationDuration = 0.23

static let stackSpacing = CGFloat(5)
static let padding = CGFloat(5)
static let contentViewPadding = CGFloat(7)
static let controlViewPadding = CGFloat(10)
static let textSidePadding = CGFloat(4)
static let textPadding = CGFloat(3)

static let statusFontSize = CGFloat(14)
}

enum StatusMessage {
static let askRun = "Tap ▶︎ button to get the answer."
static let warnEmptyQuery = "⚠️Got empty question.\nPlease enter non-empty question."
static let inferenceFailError = "❗️Failed to inference the answer."
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================

import Foundation
import TensorFlowLite

// MARK: - Data extension
extension Data {
/// Creates a new buffer by copying the buffer pointer of the given array.
///
/// - Warning: The given array's element type `T` must be trivial in that it can be copied bit
/// for bit with no indirection or reference-counting operations; otherwise, reinterpreting
/// data from the resulting buffer has undefined behavior.
/// - Parameter array: An array with elements of type `T`.
init<T>(copyingBufferOf array: [T]) {
self = array.withUnsafeBufferPointer(Data.init)
}

/// Convert a Data instance to Array representation.
func toArray<T>(type: T.Type) -> [T] where T: AdditiveArithmetic {
var array = [T](repeating: T.zero, count: self.count / MemoryLayout<T>.stride)
_ = array.withUnsafeMutableBytes { self.copyBytes(to: $0) }
return array
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation
import os

/// Helper functions used for tokenizing.
extension String {
/// Performs invalid character removal and whitespace cleanup on text.
///
/// Replaces all whitespace code points with spaces and control characters including \t, \n, \r.
///
/// - Returns: Cleaned text.
func cleaned() -> String {
return String(
// Normalize string to NFC(Normalization Form Canonical Composition).
self.precomposedStringWithCanonicalMapping
.unicodeScalars.compactMap { unicodeScalar in
if unicodeScalar.isWhitespaceForBert {
return " "
} else if !unicodeScalar.isControlForBert && !unicodeScalar.shouldBeRemovedForBert {
return Character(unicodeScalar)
}
return nil
})

}

/// Splits this string on whitespace.
func splitByWhitespace() -> [String] {
// Normalize string to NFC(Normalization Form Canonical Composition).
return self.precomposedStringWithCanonicalMapping
.unicodeScalars.split { $0.isWhitespaceForBert }.map { String($0) }
}

/// Tokenizes this string into word and punctuation tokens.
///
/// For example:
/// ```
/// input: "Hi,there."
/// output: ["Hi", ",", "there", "."]
/// ```
/// ```
/// input: "Hi, there.\n"
/// output: ["Hi", ",", " there", ".", "\n"]
/// ```
func tokenizedWithPunctuation() -> [String] {
var tokens = [String]()
var currentToken = ""
// Normalize string to NFC(Normalization Form Canonical Composition).
self.precomposedStringWithCanonicalMapping
.unicodeScalars.forEach { unicode in
if unicode.isPunctuationForBert {
if !currentToken.isEmpty {
// Add current token before the punctuation mark to the list of tokens.
tokens.append(currentToken)
}
tokens.append(String(unicode))
currentToken = ""
} else {
// As it is not a punctuation mark, keep building current token.
currentToken += String(unicode)
}
}

if !currentToken.isEmpty {
tokens.append(currentToken)
}
return tokens
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// Provides some functions that make it easy to classify the character.
extension UnicodeScalar {
/// Whether `self` is a whitespace character.
///
/// \t, \n, and \r are technically control characters but we treat them as whitespace since they
/// are generally considered as such.
var isWhitespaceForBert: Bool {
switch self {
case " ", "\t", "\n", "\r":
return true
default:
return properties.generalCategory == .spaceSeparator
}
}

/// Whether `self` is a control character.
var isControlForBert: Bool {
// These are technically control characters but we count them as whitespace characters.
if isWhitespaceForBert {
return false
}

switch properties.generalCategory {
case .control, .format: return true
default: return false
}
}

/// Whether `self` should be removed for Bert tokenization.
var shouldBeRemovedForBert: Bool {
return self == UnicodeScalar(0) || self == UnicodeScalar(0xfffd)
}

/// Whether `self` is a punctuation character.
///
/// We treat all non-letter/number ASCII as punctuation, except ASCII character 0 to 32.
/// Characters such as "^", "$", and "`" are not in the Unicode Punctuation class but we treat
/// them as punctuation anyways, for consistency.
var isPunctuationForBert: Bool {
if isASCII && value > 32 && !properties.isAlphabetic && properties.numericType == nil {
return true
}
switch properties.generalCategory {
case .closePunctuation,
.connectorPunctuation,
.dashPunctuation,
.finalPunctuation,
.initialPunctuation,
.openPunctuation,
.otherPunctuation:
return true
default:
return false
}
}
}
45 changes: 45 additions & 0 deletions lite/examples/bert_qa/ios/BertQACore/Models/Dataset.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import UIKit

/// Data set to run the TensorFlow Lite model.
struct Dataset: Decodable {
let title: String
let content: String
let questions: [String]

/// Wrapper to decode json file into `Decodable` struct.
static func load<T: Decodable>(_ file: File = MobileBERT.dataset) -> T {
let data: Data

guard let fileUrl = Bundle.main.url(forResource: file.name, withExtension: file.ext)
else {
fatalError("Couldn't find \(file.description) in main bundle.")
}

do {
data = try Data(contentsOf: fileUrl)
} catch {
fatalError("Couldn't load \(file.description) from main bundle:\n\(error)")
}

do {
let decoder = JSONDecoder()
return try decoder.decode(T.self, from: data)
} catch {
fatalError("Couldn't parse \(file.description) as \(T.self):\n\(error)")
}
}
}
41 changes: 41 additions & 0 deletions lite/examples/bert_qa/ios/BertQACore/Models/FileLoader.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation
import os

class FileLoader {
/// Loads a vocabulary file into a dictionary of vocabulary to its ID.
///
/// - Parameter file: `File` of a vocabulary.
/// - Returns: Vocabulary IDs from given `file` data.
static func loadVocabularies(from file: File) -> [String: Int32] {
guard
let path = Bundle(for: FileLoader.self).path(forResource: file.name, ofType: file.ext)
else {
fatalError("Cannot read the file: \(file.description)")
}

var vocabularyIDs = [String: Int32]()
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
for (index, string) in data.components(separatedBy: .newlines).enumerated() {
vocabularyIDs[string] = Int32(index)
}
} catch {
os_log("%s", type: .error, error.localizedDescription)
}
return vocabularyIDs
}
}
Loading

0 comments on commit 2839c06

Please sign in to comment.