Welcome, guest | Sign In | My Account | Store | Cart

Use ListMixin to create custom list classes from a small subset of list methods.

Python, 255 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
import copy
import sys

# Public domain
class ListMixin(object):
  """
  Defines all list operations from a small subset of methods.

  Subclasses should define _get_element(i), _set_element(i, value),
  __len__(), _resize_region(start, end, new_size) and
  _constructor(iterable).  Define __iter__() for extra speed.

  The _get_element() and _set_element() methods are given indices with
  0 <= i < len(self).

  The _resize_region() method should resize the slice self[start:end]
  so that it has size new_size.  It is given indices such that
  start <= end, 0 <= start <= len(self) and 0 <= end <= len(self).
  The resulting elements in self[start:start+new_size] can be set to
  None or arbitrary Python values.

  The _constructor() method accepts an iterable and should return a
  new instance of the same class as self, populated with the elements
  of the given iterable.
  """
  def __cmp__(self, other):
    return cmp(list(self), list(other))

  def __hash__(self):
    raise TypeError('list objects are unhashable')

  def __iter__(self):
    for i in xrange(len(self)):
      yield self._get_element(i)

  def _tuple_from_slice(self, i):
    """
    Get (start, end, step) tuple from slice object.
    """
    (start, end, step) = i.indices(len(self))
    # Replace (0, -1, 1) with (0, 0, 1) (misfeature in .indices()).
    if step == 1:
      if end < start:
        end = start
      step = None
    if i.step == None:
      step = None
    return (start, end, step)

  def _fix_index(self, i):
    if i < 0:
      i += len(self)
    if i < 0 or i >= len(self):
      raise IndexError('list index out of range')
    return i

  def __getitem__(self, i):
    if isinstance(i, slice):
      (start, end, step) = self._tuple_from_slice(i)
      if step == None:
        indices = xrange(start, end)
      else:
        indices = xrange(start, end, step)
      return self._constructor([self._get_element(i) for i in indices])
    else:
      return self._get_element(self._fix_index(i))

  def __setitem__(self, i, value):
    if isinstance(i, slice):
      (start, end, step) = self._tuple_from_slice(i)
      if step != None:
        # Extended slice
        indices = range(start, end, step)
        if len(value) != len(indices):
          raise ValueError(('attempt to assign sequence of size %d' +
                            ' to extended slice of size %d') %
                           (len(value), len(indices)))
        for (j, assign_val) in enumerate(value):
          self._set_element(indices[j], assign_val)
      else:
        # Normal slice
        if len(value) != (end - start):
          self._resize_region(start, end, len(value))
        for (j, assign_val) in enumerate(value):
          self._set_element(start + j, assign_val)
    else:
      # Single element
      self._set_element(self._fix_index(i), value)

  def __delitem__(self, i):
    if isinstance(i, slice):
      (start, end, step) = self._tuple_from_slice(i)
      if step != None:
        # Extended slice
        indices = range(start, end, step)
        # Sort indices descending
        if len(indices) > 0 and indices[0] < indices[-1]:
          indices.reverse()
        for j in indices:
          del self[j]
      else:
        # Normal slice
        self._resize_region(start, end, 0)
    else:
      # Single element
      i = self._fix_index(i)
      self._resize_region(i, i + 1, 0)

  def __add__(self, other):
    if isinstance(other, self.__class__):
      ans = self._constructor(self)
      ans += other
      return ans
    return list(self) + other

  def __mul__(self, other):
    ans = self._constructor(self)
    ans *= other
    return ans

  def __radd__(self, other):
    if isinstance(other, self.__class__):
      ans = other._constructor(self)
      ans += self
      return ans
    return other + list(self)

  def __rmul__(self, other):
    return self * other

  def __iadd__(self, other):
    self[len(self):len(self)] = other
    return self

  def __imul__(self, other):
    if other <= 0:
      self[:] = []
    elif other > 1:
      aux = list(self)
      for i in xrange(other-1):
        self.extend(aux)
    return self

  def append(self, other):
    self[len(self):len(self)] = [other]

  def extend(self, other):
    self[len(self):len(self)] = other

  def count(self, other):
    ans = 0
    for item in self:
      if item == other:
        ans += 1
    return ans

  def reverse(self):
    for i in xrange(len(self)//2):
      j = len(self) - 1 - i
      (self[i], self[j]) = (self[j], self[i])

  def index(self, x, i=0, j=None):
    if i != 0 or j is not None:
      (i, j, ignore) = self._tuple_from_slice(slice(i, j))
    if j is None:
      j = len(self)
    for k in xrange(i, j):
      if self._get_element(k) == x:
        return k
    raise ValueError('index(x): x not in list')

  def insert(self, i, x):
    self[i:i] = [x]

  def pop(self, i=None):
    if i == None:
      i = len(self)-1
    ans = self[i]
    del self[i]
    return ans

  def remove(self, x):
    for i in xrange(len(self)):
      if self._get_element(i) == x:
        del self[i]
        return
    raise ValueError('remove(x): x not in list')

  # Define sort() as appropriate for the Python version.
  if sys.version_info[:3] < (2, 4, 0):
    def sort(self, cmpfunc=None):
      ans = list(self)
      ans.sort(cmpfunc)
      self[:] = ans
  else:
    def sort(self, cmpfunc=None, key=None, reverse=False):
      ans = list(self)
      if reverse == True:
        ans.sort(cmpfunc, key, reverse)
      elif key != None:
        ans.sort(cmpfunc, key)
      else:
        ans.sort(cmpfunc)
      self[:] = ans

  def __copy__(self):
    return self._constructor(self)

  def __deepcopy__(self, memo={}):
    ans = self._constructor([])
    memo[id(self)] = ans
    ans[:] = copy.deepcopy(tuple(self), memo)
    return ans

  # Tracking idea from R. Hettinger's deque class.  It's not
  # multithread safe, but does work with the builtin Python classes.
  def __str__(self, track=[]):
    if id(self) in track:
      return '...'
    track.append(id(self))
    ans = '%r' % (list(self),)
    track.remove(id(self))
    return ans

  def __repr__(self):
    return self.__class__.__name__ + '(' + str(self) + ')'


# Example usage:

class TestList(ListMixin):
  def __init__(self, L=[]):
    self.L = list(L)

  def _constructor(self, iterable):
    return TestList(iterable)

  def __len__(self):
    return len(self.L)

  def _get_element(self, i):
    assert 0 <= i < len(self)
    return self.L[i]

  def _set_element(self, i, x):
    assert 0 <= i < len(self)
    self.L[i] = x

  def _resize_region(self, start, end, new_size):
    assert 0 <= start <= len(self)
    assert 0 <= end   <= len(self)
    assert start <= end
    self.L[start:end] = [None] * new_size

# Now TestList() has behavior identical to that of list().

Use ListMixin to make classes which implement the Python list interface. Note that in many cases it is easier to subclass the Python builtin list class. If one subclasses list, then the data is stored in-memory with the Python standard storage scheme. The ListMixin class is useful for different storage schemes (e.g. compressed bit arrays or lists stored on disk), more complicated data structures, or other nefarious hackery.

See http://oregonstate.edu/~barnesc/python/listmixin.py for a code listing with exhaustive unit tests. These verify that the list mixin behaves identically to the builtin Python list type.

Recipe 440658 uses this recipe to build a class for a memory-compacted list of bits.

6 comments

George Sakkis 18 years, 6 months ago  # | flag

__getitem__(slice). __getitem__(slice) would be more intuitive if it returns a self.__class__ instance instead of a list. A safe way to do this is:

ans = copy.copy(self)
ans[:] = [self.get_element(i) for i in indices]
return ans

Note that it is not in general safe and correct to call self.__class__(), hence the extra cost of copying the instance.

Connelly Barnes (author) 18 years, 6 months ago  # | flag

Bug. Right, that was a bug. I fixed it, but it required a change to the interface of ListMixin: instead of requiring __copy__, the ListMixin now requires constructor(self, iterable) => new instance initialized from given iterable. (The alternative was to require O(n) time for slicing out an empty slice, which is ridiculous).

Note that __repr__() intentionally uses self.__class__.__name__ in the created representation. Usually __repr__() is a developer feedback mechanism, so I think this is a reasonable compromise between theoretical safety and practical utility (i.e. practicality wins).

George Sakkis 18 years, 6 months ago  # | flag

That's better now. One more thing: I would prefer the required method names to start with underscore, i.e. s/get_element/_get_element, etc., since they are typically not intended to be exposed by the class inheriting the mixin.

Connelly Barnes (author) 18 years, 6 months ago  # | flag

Good idea...I changed the method names as suggested.

Demyn Plantenberg 17 years, 12 months ago  # | flag

test_list results. I ran the unittest test_list against listmixin.TestList. With a few patches, all the tests that should pass do. The patches and test results are below. Even so, if the goal is to match the behavior of built-in lists, there are problems with this implementation. Some of listmixinÂ’s methods assume the list is unaltered while the method is active. For instance:

def __iter__(self):
  for i in xrange(len(self)):
    yield self._get_element(i)

__iter__() does not accommodate changes in the listÂ’s length. If the list shrinks during the iteration, _get_elment(i) will assert; if the list gets longer, the extra elements will not be returned.

PATCHES

* Indices needed some type checking. I put in _fix_index:

  def _fix_index(self, i):
    if type(i) not in [types.IntType, types.LongType, types.BooleanType]:
      raise TypeError('list indices must be integers')



* Handle cases where lists are inserted into itself:

  def __setitem__(self, i, value):
    if value is self:
      value = self._constructor(value)



* Apparently Decimals are okay for pop and insert indices (but not elsewhere
 go figure?)

from decimal import Decimal as DecimalType


  def pop(self, i=None):
    if i == None:
      i = len(self)-1
    elif type(i) == DecimalType:
      i = int(i)


  def insert(self, i, x):
    if type(i) == DecimalType:
      i = int(i)



* Added __contains__():

  def __contains__(self, other):
    for item in self:
      if item == other:
        return True
    return False

UNITTEST TEST_LIST

test_addmul (__main__.ListMixinTest) ... ok
test_append (__main__.ListMixinTest) ... ok
test_constructor_exception_handling (__main__.ListMixinTest) ... ok
test_constructors (__main__.ListMixinTest) ... ok
test_contains (__main__.ListMixinTest) ... ok
test_count (__main__.ListMixinTest) ... ok
test_delitem (__main__.ListMixinTest) ... ok
test_delslice (__main__.ListMixinTest) ... ok
test_extend (__main__.ListMixinTest) ... ok
test_extendedslicing (__main__.ListMixinTest) ... ok
test_getitem (__main__.ListMixinTest) ... ok
test_getitemoverwriteiter (__main__.ListMixinTest) ... ok
test_getslice (__main__.ListMixinTest) ... ERROR
test_iadd (__main__.ListMixinTest) ... ok
test_identity (__main__.ListMixinTest) ... ok
test_imul (__main__.ListMixinTest) ... ok
test_index (__main__.ListMixinTest) ... ok
test_init (__main__.ListMixinTest) ... ok
test_insert (__main__.ListMixinTest) ... ok
test_len (__main__.ListMixinTest) ... ok
test_minmax (__main__.ListMixinTest) ... ok
test_pop (__main__.ListMixinTest) ... ok
test_print (__main__.ListMixinTest) ... FAIL
test_remove (__main__.ListMixinTest) ... ok
test_repeat (__main__.ListMixinTest) ... ok

(comment continued...)

Demyn Plantenberg 17 years, 12 months ago  # | flag

(...continued from previous comment)

test_repr (__main__.ListMixinTest) ... FAIL
test_reverse (__main__.ListMixinTest) ... ok
test_reversed (__main__.ListMixinTest) ... ok
test_set_subscript (__main__.ListMixinTest) ... ok
test_setitem (__main__.ListMixinTest) ... ok
test_setslice (__main__.ListMixinTest) ... ERROR
test_slice (__main__.ListMixinTest) ... ok
test_sort (__main__.ListMixinTest) ... FAIL
test_subscript (__main__.ListMixinTest) ... ok
test_truth (__main__.ListMixinTest) ... ok

test_getslice/setslice/delslice failed because these functions are not implemented (and rightly so.) test_print and test_repr failed because TestList has a different repr from list. The test_sort failure is a bit esoteric: it an exception that doesn't occur with the listmixin implementation.