[go: nahoru, domu]

Skip to content

Commit

Permalink
Added PyCon 2011 presentation
Browse files Browse the repository at this point in the history
  • Loading branch information
whiteinge committed Mar 18, 2011
1 parent a0a1f2d commit 8409adb
Show file tree
Hide file tree
Showing 4 changed files with 587 additions and 0 deletions.
195 changes: 195 additions & 0 deletions upyug_2011-03-17_pycon/counter_backport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Python 2.5 backport of collections.counter
.. __: http://code.activestate.com/recipes/576611/
"""

from operator import itemgetter
from heapq import nlargest
from itertools import repeat, ifilter

class Counter(dict):
'''Dict subclass for counting hashable objects. Sometimes called a bag
or multiset. Elements are stored as dictionary keys and their counts
are stored as dictionary values.
>>> Counter('zyzygy')
Counter({'y': 3, 'z': 2, 'g': 1})
'''

def __init__(self, iterable=None, **kwds):
'''Create a new, empty Counter object. And if given, count elements
from an input iterable. Or, initialize the count from another mapping
of elements to their counts.
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
self.update(iterable, **kwds)

def __missing__(self, key):
return 0

def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abracadabra').most_common(3)
[('a', 5), ('r', 2), ('b', 2)]
'''
if n is None:
return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
return nlargest(n, self.iteritems(), key=itemgetter(1))

def elements(self):
'''Iterator over elements repeating each as many times as its count.
>>> c = Counter('ABCABC')
>>> sorted(c.elements())
['A', 'A', 'B', 'B', 'C', 'C']
If an element's count has been set to zero or is a negative number,
elements() will ignore it.
'''
for elem, count in self.iteritems():
for _ in repeat(None, count):
yield elem

# Override dict methods where the meaning changes for Counter objects.

@classmethod
def fromkeys(cls, iterable, v=None):
raise NotImplementedError(
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')

def update(self, iterable=None, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
>>> d = Counter('watch')
>>> c.update(d) # add elements from another counter
>>> c['h'] # four 'h' in which, witch, and watch
4
'''
if iterable is not None:
if hasattr(iterable, 'iteritems'):
if self:
self_get = self.get
for elem, count in iterable.iteritems():
self[elem] = self_get(elem, 0) + count
else:
dict.update(self, iterable) # fast path when counter is empty
else:
self_get = self.get
for elem in iterable:
self[elem] = self_get(elem, 0) + 1
if kwds:
self.update(kwds)

def copy(self):
'Like dict.copy() but returns a Counter instance instead of a dict.'
return Counter(self)

def __delitem__(self, elem):
'Like dict.__delitem__() but does not raise KeyError for missing values.'
if elem in self:
dict.__delitem__(self, elem)

def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)

# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()

def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem in set(self) | set(other):
newcount = self[elem] + other[elem]
if newcount > 0:
result[elem] = newcount
return result

def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem in set(self) | set(other):
newcount = self[elem] - other[elem]
if newcount > 0:
result[elem] = newcount
return result

def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_max = max
result = Counter()
for elem in set(self) | set(other):
newcount = _max(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result

def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_min = min
result = Counter()
if len(self) < len(other):
self, other = other, self
for elem in ifilter(self.__contains__, other):
newcount = _min(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result


if __name__ == '__main__':
import doctest
print doctest.testmod()
165 changes: 165 additions & 0 deletions upyug_2011-03-17_pycon/lru_cache_backport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""Python 2.5 backport of collections.lru_cache
.. __: http://code.activestate.com/recipes/498245/
"""
import collections
import functools
from itertools import ifilterfalse
from heapq import nsmallest
from operator import itemgetter

class Counter(dict):
'Mapping where default values are zero'
def __missing__(self, key):
return 0

def lru_cache(maxsize=100):
'''Least-recently-used cache decorator.
Arguments to the cached function must be hashable.
Cache performance statistics stored in f.hits and f.misses.
Clear the cache with f.clear().
http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
'''
maxqueue = maxsize * 10
def decorating_function(user_function,
len=len, iter=iter, tuple=tuple, sorted=sorted, KeyError=KeyError):
cache = {} # mapping of args to results
queue = collections.deque() # order that keys have been used
refcount = Counter() # times each key is in the queue
sentinel = object() # marker for looping around the queue
kwd_mark = object() # separate positional and keyword args

# lookup optimizations (ugly but fast)
queue_append, queue_popleft = queue.append, queue.popleft
queue_appendleft, queue_pop = queue.appendleft, queue.pop

@functools.wraps(user_function)
def wrapper(*args, **kwds):
# cache key records both positional and keyword args
key = args
if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items()))

# record recent use of this key
queue_append(key)
refcount[key] += 1

# get cache entry or compute if not found
try:
result = cache[key]
wrapper.hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
wrapper.misses += 1

# purge least recently used cache entry
if len(cache) > maxsize:
key = queue_popleft()
refcount[key] -= 1
while refcount[key]:
key = queue_popleft()
refcount[key] -= 1
del cache[key], refcount[key]

# periodically compact the queue by eliminating duplicate keys
# while preserving order of most recent access
if len(queue) > maxqueue:
refcount.clear()
queue_appendleft(sentinel)
for key in ifilterfalse(refcount.__contains__,
iter(queue_pop, sentinel)):
queue_appendleft(key)
refcount[key] = 1


return result

def clear():
cache.clear()
queue.clear()
refcount.clear()
wrapper.hits = wrapper.misses = 0

wrapper.hits = wrapper.misses = 0
wrapper.clear = clear
return wrapper
return decorating_function


def lfu_cache(maxsize=100):
'''Least-frequenty-used cache decorator.
Arguments to the cached function must be hashable.
Cache performance statistics stored in f.hits and f.misses.
Clear the cache with f.clear().
http://en.wikipedia.org/wiki/Least_Frequently_Used
'''
def decorating_function(user_function):
cache = {} # mapping of args to results
use_count = Counter() # times each key has been accessed
kwd_mark = object() # separate positional and keyword args

@functools.wraps(user_function)
def wrapper(*args, **kwds):
key = args
if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items()))
use_count[key] += 1

# get cache entry or compute if not found
try:
result = cache[key]
wrapper.hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
wrapper.misses += 1

# purge least frequently used cache entry
if len(cache) > maxsize:
for key, _ in nsmallest(maxsize // 10,
use_count.iteritems(),
key=itemgetter(1)):
del cache[key], use_count[key]

return result

def clear():
cache.clear()
use_count.clear()
wrapper.hits = wrapper.misses = 0

wrapper.hits = wrapper.misses = 0
wrapper.clear = clear
return wrapper
return decorating_function


if __name__ == '__main__':

@lru_cache(maxsize=20)
def f(x, y):
return 3*x+y

domain = range(5)
from random import choice
for i in range(1000):
r = f(choice(domain), choice(domain))

print(f.hits, f.misses)

@lfu_cache(maxsize=20)
def f(x, y):
return 3*x+y

domain = range(5)
from random import choice
for i in range(1000):
r = f(choice(domain), choice(domain))

print(f.hits, f.misses)
Binary file added upyug_2011-03-17_pycon/presentation.pdf
Binary file not shown.
Loading

0 comments on commit 8409adb

Please sign in to comment.