123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488 |
- """
- An OrderedSet is a custom MutableSet that remembers its order, so that every
- entry has an index that can be looked up.
- Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger,
- and released under the MIT license.
- """
- import itertools as it
- from collections import deque
- try:
- # Python 3
- from collections.abc import MutableSet, Sequence
- except ImportError:
- # Python 2.7
- from collections import MutableSet, Sequence
- SLICE_ALL = slice(None)
- __version__ = "3.1"
- def is_iterable(obj):
- """
- Are we being asked to look up a list of things, instead of a single thing?
- We check for the `__iter__` attribute so that this can cover types that
- don't have to be known by this module, such as NumPy arrays.
- Strings, however, should be considered as atomic values to look up, not
- iterables. The same goes for tuples, since they are immutable and therefore
- valid entries.
- We don't need to check for the Python 2 `unicode` type, because it doesn't
- have an `__iter__` attribute anyway.
- """
- return (
- hasattr(obj, "__iter__")
- and not isinstance(obj, str)
- and not isinstance(obj, tuple)
- )
- class OrderedSet(MutableSet, Sequence):
- """
- An OrderedSet is a custom MutableSet that remembers its order, so that
- every entry has an index that can be looked up.
- Example:
- >>> OrderedSet([1, 1, 2, 3, 2])
- OrderedSet([1, 2, 3])
- """
- def __init__(self, iterable=None):
- self.items = []
- self.map = {}
- if iterable is not None:
- self |= iterable
- def __len__(self):
- """
- Returns the number of unique elements in the ordered set
- Example:
- >>> len(OrderedSet([]))
- 0
- >>> len(OrderedSet([1, 2]))
- 2
- """
- return len(self.items)
- def __getitem__(self, index):
- """
- Get the item at a given index.
- If `index` is a slice, you will get back that slice of items, as a
- new OrderedSet.
- If `index` is a list or a similar iterable, you'll get a list of
- items corresponding to those indices. This is similar to NumPy's
- "fancy indexing". The result is not an OrderedSet because you may ask
- for duplicate indices, and the number of elements returned should be
- the number of elements asked for.
- Example:
- >>> oset = OrderedSet([1, 2, 3])
- >>> oset[1]
- 2
- """
- if isinstance(index, slice) and index == SLICE_ALL:
- return self.copy()
- elif is_iterable(index):
- return [self.items[i] for i in index]
- elif hasattr(index, "__index__") or isinstance(index, slice):
- result = self.items[index]
- if isinstance(result, list):
- return self.__class__(result)
- else:
- return result
- else:
- raise TypeError("Don't know how to index an OrderedSet by %r" % index)
- def copy(self):
- """
- Return a shallow copy of this object.
- Example:
- >>> this = OrderedSet([1, 2, 3])
- >>> other = this.copy()
- >>> this == other
- True
- >>> this is other
- False
- """
- return self.__class__(self)
- def __getstate__(self):
- if len(self) == 0:
- # The state can't be an empty list.
- # We need to return a truthy value, or else __setstate__ won't be run.
- #
- # This could have been done more gracefully by always putting the state
- # in a tuple, but this way is backwards- and forwards- compatible with
- # previous versions of OrderedSet.
- return (None,)
- else:
- return list(self)
- def __setstate__(self, state):
- if state == (None,):
- self.__init__([])
- else:
- self.__init__(state)
- def __contains__(self, key):
- """
- Test if the item is in this ordered set
- Example:
- >>> 1 in OrderedSet([1, 3, 2])
- True
- >>> 5 in OrderedSet([1, 3, 2])
- False
- """
- return key in self.map
- def add(self, key):
- """
- Add `key` as an item to this OrderedSet, then return its index.
- If `key` is already in the OrderedSet, return the index it already
- had.
- Example:
- >>> oset = OrderedSet()
- >>> oset.append(3)
- 0
- >>> print(oset)
- OrderedSet([3])
- """
- if key not in self.map:
- self.map[key] = len(self.items)
- self.items.append(key)
- return self.map[key]
- append = add
- def update(self, sequence):
- """
- Update the set with the given iterable sequence, then return the index
- of the last element inserted.
- Example:
- >>> oset = OrderedSet([1, 2, 3])
- >>> oset.update([3, 1, 5, 1, 4])
- 4
- >>> print(oset)
- OrderedSet([1, 2, 3, 5, 4])
- """
- item_index = None
- try:
- for item in sequence:
- item_index = self.add(item)
- except TypeError:
- raise ValueError(
- "Argument needs to be an iterable, got %s" % type(sequence)
- )
- return item_index
- def index(self, key):
- """
- Get the index of a given entry, raising an IndexError if it's not
- present.
- `key` can be an iterable of entries that is not a string, in which case
- this returns a list of indices.
- Example:
- >>> oset = OrderedSet([1, 2, 3])
- >>> oset.index(2)
- 1
- """
- if is_iterable(key):
- return [self.index(subkey) for subkey in key]
- return self.map[key]
- # Provide some compatibility with pd.Index
- get_loc = index
- get_indexer = index
- def pop(self):
- """
- Remove and return the last element from the set.
- Raises KeyError if the set is empty.
- Example:
- >>> oset = OrderedSet([1, 2, 3])
- >>> oset.pop()
- 3
- """
- if not self.items:
- raise KeyError("Set is empty")
- elem = self.items[-1]
- del self.items[-1]
- del self.map[elem]
- return elem
- def discard(self, key):
- """
- Remove an element. Do not raise an exception if absent.
- The MutableSet mixin uses this to implement the .remove() method, which
- *does* raise an error when asked to remove a non-existent item.
- Example:
- >>> oset = OrderedSet([1, 2, 3])
- >>> oset.discard(2)
- >>> print(oset)
- OrderedSet([1, 3])
- >>> oset.discard(2)
- >>> print(oset)
- OrderedSet([1, 3])
- """
- if key in self:
- i = self.map[key]
- del self.items[i]
- del self.map[key]
- for k, v in self.map.items():
- if v >= i:
- self.map[k] = v - 1
- def clear(self):
- """
- Remove all items from this OrderedSet.
- """
- del self.items[:]
- self.map.clear()
- def __iter__(self):
- """
- Example:
- >>> list(iter(OrderedSet([1, 2, 3])))
- [1, 2, 3]
- """
- return iter(self.items)
- def __reversed__(self):
- """
- Example:
- >>> list(reversed(OrderedSet([1, 2, 3])))
- [3, 2, 1]
- """
- return reversed(self.items)
- def __repr__(self):
- if not self:
- return "%s()" % (self.__class__.__name__,)
- return "%s(%r)" % (self.__class__.__name__, list(self))
- def __eq__(self, other):
- """
- Returns true if the containers have the same items. If `other` is a
- Sequence, then order is checked, otherwise it is ignored.
- Example:
- >>> oset = OrderedSet([1, 3, 2])
- >>> oset == [1, 3, 2]
- True
- >>> oset == [1, 2, 3]
- False
- >>> oset == [2, 3]
- False
- >>> oset == OrderedSet([3, 2, 1])
- False
- """
- # In Python 2 deque is not a Sequence, so treat it as one for
- # consistent behavior with Python 3.
- if isinstance(other, (Sequence, deque)):
- # Check that this OrderedSet contains the same elements, in the
- # same order, as the other object.
- return list(self) == list(other)
- try:
- other_as_set = set(other)
- except TypeError:
- # If `other` can't be converted into a set, it's not equal.
- return False
- else:
- return set(self) == other_as_set
- def union(self, *sets):
- """
- Combines all unique items.
- Each items order is defined by its first appearance.
- Example:
- >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0])
- >>> print(oset)
- OrderedSet([3, 1, 4, 5, 2, 0])
- >>> oset.union([8, 9])
- OrderedSet([3, 1, 4, 5, 2, 0, 8, 9])
- >>> oset | {10}
- OrderedSet([3, 1, 4, 5, 2, 0, 10])
- """
- cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet
- containers = map(list, it.chain([self], sets))
- items = it.chain.from_iterable(containers)
- return cls(items)
- def __and__(self, other):
- # the parent implementation of this is backwards
- return self.intersection(other)
- def intersection(self, *sets):
- """
- Returns elements in common between all sets. Order is defined only
- by the first set.
- Example:
- >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3])
- >>> print(oset)
- OrderedSet([1, 2, 3])
- >>> oset.intersection([2, 4, 5], [1, 2, 3, 4])
- OrderedSet([2])
- >>> oset.intersection()
- OrderedSet([1, 2, 3])
- """
- cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet
- if sets:
- common = set.intersection(*map(set, sets))
- items = (item for item in self if item in common)
- else:
- items = self
- return cls(items)
- def difference(self, *sets):
- """
- Returns all elements that are in this set but not the others.
- Example:
- >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]))
- OrderedSet([1, 3])
- >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3]))
- OrderedSet([1])
- >>> OrderedSet([1, 2, 3]) - OrderedSet([2])
- OrderedSet([1, 3])
- >>> OrderedSet([1, 2, 3]).difference()
- OrderedSet([1, 2, 3])
- """
- cls = self.__class__
- if sets:
- other = set.union(*map(set, sets))
- items = (item for item in self if item not in other)
- else:
- items = self
- return cls(items)
- def issubset(self, other):
- """
- Report whether another set contains this set.
- Example:
- >>> OrderedSet([1, 2, 3]).issubset({1, 2})
- False
- >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4})
- True
- >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5})
- False
- """
- if len(self) > len(other): # Fast check for obvious cases
- return False
- return all(item in other for item in self)
- def issuperset(self, other):
- """
- Report whether this set contains another set.
- Example:
- >>> OrderedSet([1, 2]).issuperset([1, 2, 3])
- False
- >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3})
- True
- >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3})
- False
- """
- if len(self) < len(other): # Fast check for obvious cases
- return False
- return all(item in self for item in other)
- def symmetric_difference(self, other):
- """
- Return the symmetric difference of two OrderedSets as a new set.
- That is, the new set will contain all elements that are in exactly
- one of the sets.
- Their order will be preserved, with elements from `self` preceding
- elements from `other`.
- Example:
- >>> this = OrderedSet([1, 4, 3, 5, 7])
- >>> other = OrderedSet([9, 7, 1, 3, 2])
- >>> this.symmetric_difference(other)
- OrderedSet([4, 5, 9, 2])
- """
- cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet
- diff1 = cls(self).difference(other)
- diff2 = cls(other).difference(self)
- return diff1.union(diff2)
- def _update_items(self, items):
- """
- Replace the 'items' list of this OrderedSet with a new one, updating
- self.map accordingly.
- """
- self.items = items
- self.map = {item: idx for (idx, item) in enumerate(items)}
- def difference_update(self, *sets):
- """
- Update this OrderedSet to remove items from one or more other sets.
- Example:
- >>> this = OrderedSet([1, 2, 3])
- >>> this.difference_update(OrderedSet([2, 4]))
- >>> print(this)
- OrderedSet([1, 3])
- >>> this = OrderedSet([1, 2, 3, 4, 5])
- >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6]))
- >>> print(this)
- OrderedSet([3, 5])
- """
- items_to_remove = set()
- for other in sets:
- items_to_remove |= set(other)
- self._update_items([item for item in self.items if item not in items_to_remove])
- def intersection_update(self, other):
- """
- Update this OrderedSet to keep only items in another set, preserving
- their order in this set.
- Example:
- >>> this = OrderedSet([1, 4, 3, 5, 7])
- >>> other = OrderedSet([9, 7, 1, 3, 2])
- >>> this.intersection_update(other)
- >>> print(this)
- OrderedSet([1, 3, 7])
- """
- other = set(other)
- self._update_items([item for item in self.items if item in other])
- def symmetric_difference_update(self, other):
- """
- Update this OrderedSet to remove items from another set, then
- add items from the other set that were not present in this set.
- Example:
- >>> this = OrderedSet([1, 4, 3, 5, 7])
- >>> other = OrderedSet([9, 7, 1, 3, 2])
- >>> this.symmetric_difference_update(other)
- >>> print(this)
- OrderedSet([4, 5, 9, 2])
- """
- items_to_add = [item for item in other if item not in self]
- items_to_remove = set(other)
- self._update_items(
- [item for item in self.items if item not in items_to_remove] + items_to_add
- )
|