Welcome, guest | Sign In | My Account | Store | Cart

There are quite a few memoize decorators floating around, but none that met all my requirements. Here's my attempt. It draws from Bengt Richter's O(1) length-limited LRU cache (http://mail.python.org/pipermail/python-list/2002-October/125872.html) and a simplification of Daniel Brodie's Simple Decorators recipe (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/437086).

Python, 129 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import cPickle

__all__ = ['memoize']


# This would usually be defined elsewhere
class decoratorargs(object):
	def __new__(typ, *attr_args, **attr_kwargs):
		def decorator(orig_func):
			self = object.__new__(typ)
			self.__init__(orig_func, *attr_args, **attr_kwargs)
			return self
		
		return decorator


class memoize(decoratorargs):
	class Node:
		__slots__ = ['key', 'value', 'older', 'newer']
		def __init__(self, key, value, older=None, newer=None):
			self.key = key
			self.value = value
			self.older = older
			self.newer = newer
	
	def __init__(self, func, capacity, keyfunc=lambda *args, **kwargs: cPickle.dumps((args, kwargs))):
		self.func = func
		self.capacity = capacity
		self.keyfunc = keyfunc
		self.reset()
	
	def reset(self):
		self.mru = self.Node(None, None)
		self.mru.older = self.mru.newer = self.mru
		self.nodes = {self.mru.key: self.mru}
		self.count = 1
		self.hits = 0
		self.misses = 0
	
	def __call__(self, *args, **kwargs):
		key = self.keyfunc(*args, **kwargs)
		try:
			node = self.nodes[key]
		except KeyError:
			# We have an entry not in the cache
			self.misses += 1
			
			value = self.func(*args, **kwargs)

			lru = self.mru.newer  # Always true
			
			# If we haven't reached capacity
			if self.count < self.capacity:
				# Put it between the MRU and LRU - it'll be the new MRU
				node = self.Node(key, value, self.mru, lru)
				self.mru.newer = node
	
				lru.older = node
				self.mru = node
				self.count += 1
			else:
				# It's FULL! We'll make the LRU be the new MRU, but replace its
				# value first
				del self.nodes[lru.key]  # This mapping is now invalid
				lru.key = key
				lru.value = value
				self.mru = lru

			# Add the new mapping
			self.nodes[key] = self.mru
			return value
		
		# We have an entry in the cache
		self.hits += 1
		
		# If it's already the MRU, do nothing
		if node is self.mru:
			return node.value
		
		lru = self.mru.newer  # Always true
		
		# If it's the LRU, update the MRU to be it
		if node is lru:
			self.mru = lru
			return node.value
		
		# Remove the node from the list
		node.older.newer = node.newer
		node.newer.older = node.older
		
		# Put it between MRU and LRU
		node.older = self.mru
		self.mru.newer = node
		
		node.newer = lru
		lru.older = node
		
		self.mru = node
		return node.value


# Example usage - fib only needs a cache size of 3 to keep it from
# being an exponential-time algorithm
@memoize(3)
def fib(n): return (n > 1) and (fib(n - 1) + fib(n - 2)) or 1

fib(100)  # => 573147844013817084101L

# This is faster because it doesn't use the default key function -
# it doesn't need to call cPickle.dumps((*args, **kwargs))
@memoize(100, lambda n: n)
def fib(n): return (n > 1) and (fib(n - 1) + fib(n - 2)) or 1

fib(100)  # => 573147844013817084101L

# See what's in the cache
# => [(98, 218922995834555169026L), (99, 354224848179261915075L), (100, 573147844013817084101L)]
[(node.key, node.value) for node in fib.nodes.values()]

# Get an example of the key function working
fib.keyfunc(40)  # => 40

# Simple report on performance
# => Hit %: 0.492462
print 'Hit %%: %f' % (float(fib.hits) / (fib.hits + fib.misses))

# Resize the LRU cache
fib.capacity = 100
fib.reset()  # Not necessary unless you shrink it

I assume anyone who wants a memoizer will know why they want it.

Known issues:

1) Changing the capacity downward doesn't actually change the capacity until you reset the cache.

2) If garbage collection is disabled, it leaks because it uses a doubly-linked list. To fix this, the reset() function should null all forward and back references.

1 comment

Bruce Christensen 17 years, 4 months ago  # | flag

Memos are shared between instances. The implementation above shares memos between different instances of the same object. If you want each instance to use a different memo, use something like this:

class memoized_method(memoized):
    def __init__(self, func, capacity, keyfunc=lambda *args, **kwargs: '%s: %s'
    % (id(args[0]), cPickle.dumps((args, kwargs)))):
        memoized.__init__(self, func, capacity, keyfunc)