[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] try chashmap in cache implementations #29

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
try chashmap
  • Loading branch information
epwalsh committed Jan 2, 2020
commit 023d51ef47becee6307200ff24388f1e0cfad6a4
1 change: 1 addition & 0 deletions tokenizers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ clap = "2.33.0"
unicode-normalization-alignments = "0.1.12"
unicode_categories = "0.1.1"
indicatif = "0.13.0"
chashmap = "2.2.2"

[dev-dependencies]
criterion = "0.3.0"
Expand Down
29 changes: 10 additions & 19 deletions tokenizers/src/models/bpe/cache.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::collections::HashMap;
use chashmap::CHashMap;
use std::hash::Hash;
use std::sync::RwLock;

/// The default capacity for a new `Cache`.
pub static DEFAULT_CACHE_CAPACITY: usize = 10_000;
Expand All @@ -14,7 +13,7 @@ where
K: Eq + Hash + Clone,
V: Clone,
{
map: RwLock<HashMap<K, V>>,
map: CHashMap<K, V>,
pub capacity: usize,
}

Expand All @@ -35,7 +34,7 @@ where
{
/// Create new `Cache` with the given capacity.
pub fn new(capacity: usize) -> Self {
let map = RwLock::new(HashMap::with_capacity(capacity));
let map = CHashMap::with_capacity(capacity);
Cache { map, capacity }
}

Expand All @@ -46,35 +45,27 @@ where

/// Try clearing the cache.
pub fn try_clear(&self) {
if let Ok(ref mut cache) = self.map.try_write() {
cache.clear();
}
self.map.clear();
}

pub fn get_values<I>(&self, keys_iter: I) -> Option<Vec<Option<V>>>
where
I: Iterator<Item = K>,
{
if let Ok(ref mut cache) = self.map.try_read() {
Some(keys_iter.map(|k| cache.get(&k).cloned()).collect())
} else {
None
}
Some(keys_iter.map(|k| self.map.get(&k).map(|v| v.clone())).collect())
}

pub fn set_values<I, J>(&self, keys_iter: I, values_iter: J)
where
I: Iterator<Item = K>,
J: Iterator<Item = Option<V>>,
{
if let Ok(ref mut cache) = self.map.try_write() {
for (key, value) in keys_iter.zip(values_iter).filter(|(_, v)| v.is_some()) {
// If already at capacity, don't add any more values.
if cache.len() >= self.capacity {
break;
}
cache.insert(key, value.unwrap());
for (key, value) in keys_iter.zip(values_iter).filter(|(_, v)| v.is_some()) {
// If already at capacity, don't add any more values.
if self.map.len() >= self.capacity {
break;
}
self.map.insert(key, value.unwrap());
}
}
}