Preparing release. (#1355)

* Preparing release. * Fix new clippy
huggingface · Oct 6, 2023 · 4322056 · 4322056
1 parent aed491d
commit 4322056
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 27 deletions.
diff --git a/bindings/node/Cargo.toml b/bindings/node/Cargo.toml
@@ -2,7 +2,7 @@
 authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 edition = "2021"
 name    = "node"
-version = "0.14.1-dev.0"
+version = "0.14.2-dev.0"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 

diff --git a/bindings/node/index.d.ts b/bindings/node/index.d.ts
@@ -175,7 +175,7 @@ export class Encoding {
   getSequenceIds(): Array<number | undefined | null>
   tokenToSequence(token: number): number | null
 }
-export class Model { }
+export class Model {}
 export type Bpe = BPE
 export class BPE {
   static empty(): Model
@@ -204,7 +204,7 @@ export class Normalizer {
 export class PreTokenizer {
   preTokenizeString(sequence: string): [string, [number, number]][]
 }
-export class Processor { }
+export class Processor {}
 export class AddedToken {
   constructor(token: string, isSpecial: boolean, options?: AddedTokenOptions | undefined | null)
   getContent(): string
@@ -229,7 +229,6 @@ export class Tokenizer {
   decodeBatch(ids: Array<Array<number>>, skipSpecialTokens: boolean): Promise<string[]>
   static fromString(s: string): Tokenizer
   static fromFile(file: string): Tokenizer
-  // static fromPretrained(file: string, parameters?: JsFromPretrainedParameters | undefined | null): Tokenizer
   addSpecialTokens(tokens: Array<string>): void
   setTruncation(maxLength: number, options?: TruncationOptions | undefined | null): void
   disableTruncation(): void
@@ -251,4 +250,4 @@ export class Tokenizer {
     addSpecialTokens?: boolean | undefined | null,
   ): Encoding
 }
-export class Trainer { }
+export class Trainer {}
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tokenizers-python"
-version = "0.14.1-dev.0"
+version = "0.14.2-dev.0"
 authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
 edition = "2021"
 
@@ -21,7 +21,7 @@  version = "6.4", default-features = false }
 itertools = "0.11"
 
 [dependencies.tokenizers]
-version = "0.14.1-dev.0"
+version = "0.14.2-dev.0"
 path = "../../tokenizers"
 
 [dev-dependencies]

diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml
@@ -2,7 +2,7 @@
 authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
 edition = "2018"
 name = "tokenizers"
-version = "0.14.1-dev.0"
+version = "0.14.2-dev.0"
 homepage = "https://github.com/huggingface/tokenizers"
 repository = "https://github.com/huggingface/tokenizers"
 documentation = "https://docs.rs/tokenizers/"

diff --git a/tokenizers/src/models/bpe/trainer.rs b/tokenizers/src/models/bpe/trainer.rs
@@ -21,17 +21,17 @@ impl PartialEq for Merge {
 }
 impl PartialOrd for Merge {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        if self.count != other.count {
-            Some(self.count.cmp(&other.count))
-        } else {
-            // Here we want ascending order
-            Some(other.pair.cmp(&self.pair))
-        }
+        Some(self.cmp(other))
     }
 }
 impl Ord for Merge {
     fn cmp(&self, other: &Self) -> Ordering {
-        self.partial_cmp(other).unwrap()
+        if self.count != other.count {
+            self.count.cmp(&other.count)
+        } else {
+            // Here we want ascending order
+            other.pair.cmp(&self.pair)
+        }
     }
 }
 
@@ -533,15 +533,16 @@ impl BpeTrainer {
             let changes = top
                 .pos
                 .maybe_par_iter()
-                .flat_map(|i| {
-                    let w = &words[*i] as *const _ as *mut _;
+                .flat_map(|&i| {
+                    let word = &words[i] as *const _ as *mut Word;
                     // We can merge each of these words in parallel here because each position
                     // can be there only once (HashSet). So this is safe.
                     unsafe {
-                        let word: &mut Word = &mut (*w);
-                        word.merge(top.pair.0, top.pair.1, new_token_id, max_token_length)
+                        // let word: &mut Word = &mut (*word);
+                        (*word)
+                            .merge(top.pair.0, top.pair.1, new_token_id, max_token_length)
                             .into_iter()
-                            .map(|c| (c, *i))
+                            .map(|c| (c, i))
                             .collect::<Vec<_>>()
                     }
                 })

diff --git a/tokenizers/src/models/bpe/word.rs b/tokenizers/src/models/bpe/word.rs
@@ -20,17 +20,17 @@ impl PartialOrd for Merge {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         // By manually implementing this, we make the containing BinaryHeap a
         // min-heap ordered first on the rank, and the pos otherwise
-        if self.rank != other.rank {
-            Some(other.rank.cmp(&self.rank))
-        } else {
-            Some(other.pos.cmp(&self.pos))
-        }
+        Some(self.cmp(other))
     }
 }
 
 impl Ord for Merge {
     fn cmp(&self, other: &Self) -> Ordering {
-        self.partial_cmp(other).unwrap()
+        if self.rank != other.rank {
+            other.rank.cmp(&self.rank)
+        } else {
+            other.pos.cmp(&self.pos)
+        }
     }
 }
 

diff --git a/tokenizers/src/models/unigram/trie.rs b/tokenizers/src/models/unigram/trie.rs
@@ -25,7 +25,7 @@ impl<Label: Eq + Hash + Copy> Trie<Label> {
     pub fn push(&mut self, element: &[Label]) {
         let mut node = &mut self.root;
         for label in element.iter() {
-            node = node.children.entry(*label).or_insert_with(Node::default);
+            node = node.children.entry(*label).or_default();
         }
         node.is_leaf = true;
     }