Reference: python doc
import collections
ChainMap is a faster way to merge some dicts, compared to dict_old.update(dict_new)
dict1 = {"John": True, "Bob": False}
dict2 = {"Alice": True}
cm = collections.ChainMap(dict1, dict2) # this is first than merging two dicts by updates
cm.maps
[{'Bob': False, 'John': True}, {'Alice': True}]
dict2["Lucy"]=False
cm.maps ## the edit on the input can be reflected by chainmap automatically
[{'Bob': False, 'John': True}, {'Alice': True, 'Lucy': False}]
cm["Mike"]=False
cm.maps ## all the add on cm itself is reflected on the first map
[{'Bob': False, 'John': True, 'Mike': False}, {'Alice': True, 'Lucy': False}]
cm['Alice'] ## the search is go through the maps list, slower than one dict!
True
dict2['John']=False
cm['John'] # for the same key, the previous one will override the latter one when query
True
## a deep search version of ChainMap, the ordinary chainmap only update the first dict in the map list
## below is a hack subclass where update can also happen in other dicts
class DeepChainMap(ChainMap):
'Variant of ChainMap that allows direct updates to inner scopes'
def __setitem__(self, key, value):
for mapping in self.maps:
if key in mapping:
mapping[key] = value
return
self.maps[0][key] = value
def __delitem__(self, key):
for mapping in self.maps:
if key in mapping:
del mapping[key]
return
raise KeyError(key)
cm_c = cm.new_child() ## support for nested context
cm_c.maps
[{}, {'Bob': False, 'John': True, 'Mike': False}, {'Alice': True, 'John': False, 'Lucy': False}]
cm_c.parents
ChainMap({'John': True, 'Bob': False, 'Mike': False}, {'Alice': True, 'Lucy': False, 'John': False})
counter is ... just a counter, similar with the implmentation via dict, actually a subclass of dict which can directly count iterable input
ct = collections.Counter()
for w in [1,2,3,1,2,1]:
ct[w] += 1
ct
Counter({1: 3, 2: 2, 3: 1})
## better way to count
collections.Counter([1,2,3,1,2,1])
Counter({1: 3, 2: 2, 3: 1})
wd=collections.Counter("hello world!")
wd
Counter({' ': 1, '!': 1, 'd': 1, 'e': 1, 'h': 1, 'l': 3, 'o': 2, 'r': 1, 'w': 1})
wd["z"] # for missing value, return 0 instead of rasing error
0
list(wd.elements()) ## same things together
['h', 'e', 'l', 'l', 'l', 'o', 'o', ' ', 'w', 'r', 'd', '!']
wd.most_common(2) ## the 2 most common char
[('l', 3), ('o', 2)]
wd2 = {"l":10}
wd.update(wd2)
wd # the value is added instead of updated by dict.update
Counter({' ': 1, '!': 1, 'd': 1, 'e': 1, 'h': 1, 'l': 13, 'o': 2, 'r': 1, 'w': 1})
wd2 = collections.Counter(wd2)
wd2 ## counter can also directly initialized with a dict
Counter({'l': 10})
wd+wd2, wd-wd2, wd&wd2, wd|wd2 ## overload for +, - (keep only positive ones), min, max
(Counter({' ': 1, '!': 1, 'd': 1, 'e': 1, 'h': 1, 'l': 23, 'o': 2, 'r': 1, 'w': 1}), Counter({' ': 1, '!': 1, 'd': 1, 'e': 1, 'h': 1, 'l': 3, 'o': 2, 'r': 1, 'w': 1}), Counter({'l': 10}), Counter({' ': 1, '!': 1, 'd': 1, 'e': 1, 'h': 1, 'l': 13, 'o': 2, 'r': 1, 'w': 1}))
deque is a double-ended queue, it is thread safe, and O(1) for pop and append on both side of the queue
dq = collections.deque([1,2,3,4],maxlen=5) # the maxlen gives the restriction on the queue, new append must
# come with pop from the other end when maxlen is reached
dq.append(5)
list(dq)
[1, 2, 3, 4, 5]
dq.appendleft(0)
list(dq)
[0, 1, 2, 3, 4]
dq.count(1) # count how many 1 in the queue
1
dq.extend([6,7,8])
list(dq)
[3, 4, 6, 7, 8]
dq.popleft()
3
list(dq)
[4, 6, 7, 8]
dq.rotate(2)
list(dq) ## rotate the linked list 2 steps to the right
[7, 8, 4, 6]
of course, defaultdict is a subclass of dict. with a new input for constructor called default_factory. It is designed for a default value of the dict when key is missing. Namely everytime the accessed key is missing, the default_factory() is called.
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = collections.defaultdict(list)
[d[k].append(v) for k,v in s]
d
defaultdict(list, {'blue': [2, 4], 'red': [1], 'yellow': [1, 3]})
## to understand the above code, just recall that
list() ## the is the function called each time a missing key is accessed
[]
## the usage above is equivalent to
d = {}
for k, v in s:
d.setdefault(k, []).append(v)
d
{'blue': [2, 4], 'red': [1], 'yellow': [1, 3]}
int(),set(),tuple() ## lots of interesting default factory can be used and explored!
(0, set(), ())
assign name to tuple, which is better understood compared to access by index pos
Point = collections.namedtuple('Point','x y') # for the field name ['x', 'y'], 'x y', 'x,y' are all legal formats
p=Point(11,22)
p.x, p.y
(11, 22)
p[0], p[1] ## the old fashioned way in tuple also works
(11, 22)
p = p._replace(x=33) ## tuple finally can be changed
p.x
33
p._fields ## view field names
('x', 'y')
A dict in python is implemented by hash to obtain O(1) speed with the cost that the keys cannot be sorted. An orderdict, well, is a dict remembering order. (order of keys that first inserted)
od = collections.OrderedDict([(1,2),(3,4)])
od[1], od[3]
(2, 4)
od.popitem(last=True) # pop in LIFO fashion if last is True (by default), otherwise pop in FIFO order
(3, 4)
od
OrderedDict([(1, 2)])