Created
May 7, 2016 05:19
-
-
Save Klortho/7d83975559bdcc47ac64fd7d877934f6 to your computer and use it in GitHub Desktop.
Merge a list of Python dict's (or any mapping), recursively
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import itertools | |
from itertools import chain | |
import operator | |
import sys | |
import pprint | |
from collections import abc | |
pp = pprint.PrettyPrinter(indent=4) | |
# This class builds upon the Chainmap class described here: | |
# http://code.activestate.com/recipes/305268/. | |
# It provides an elegant way to merge multiple hierarchical dictionaries or | |
# other mapping-type objects in Python. | |
is_mapping = lambda x: isinstance(x, abc.Mapping) | |
not_mapping = lambda x: not(is_mapping(x)) | |
class ChainMapTree(abc.Mapping): | |
"""Combine/overlay multiple hierarchical mappings. This efficiently merges | |
multiple hierarchical (could be several layers deep) dictionaries, producing | |
a new view into them that acts exactly like a merged dictionary, but without | |
doing any copying. | |
Because it doesn't actually copy the data, it is intended to be used only | |
with immutable mappings. It is safe to change *leaf* data values, | |
and the results will be reflected here, but changing the structure of any | |
of the trees will not work. | |
""" | |
def __init__(self, *maps): | |
_maps = list(maps) | |
# All keys of kids that are mappings | |
kid_keys = set([key for m in maps | |
for key in m.keys() if is_mapping(m[key])]) | |
# This will be a dictionary of lists of mappings | |
kid_maps = {}; | |
for key in kid_keys: | |
# The list of child mappings for this key | |
kmaps = [ m[key] for m in maps if key in m ] | |
# Make sure they are *all* mappings | |
if any(map(not_mapping, kmaps)): raise KeyError(key) | |
# Recurse | |
kid_maps[key] = ChainMapTree(*kmaps) | |
# If non-empty, prepend it to the existing list | |
if len(kid_maps.keys()) > 0: _maps.insert(0, kid_maps) | |
self._maps = _maps | |
def __getitem__(self, key): | |
for mapping in self._maps: | |
try: | |
return mapping[key] | |
except KeyError: | |
pass | |
raise KeyError(key) | |
def __iter__(self): | |
return self._maps.__iter__() | |
def __len__(self): | |
return self._maps.__len__() | |
if __name__ == "__main__": | |
d1 = {'a':1, 'b':2, 'c': {'c1': 1, 'c2': 2}} | |
d2 = {'a':3, 'd':4, 'c': {'c2': 4, 'c3': 3}} | |
cm = ChainMapTree(d1, d2) | |
assert cm['a'] == 1 | |
assert cm['b'] == 2 | |
assert cm['d'] == 4 | |
try: | |
print(cm['f']) | |
except KeyError: | |
pass | |
else: | |
raise Exception('Did not raise KeyError for missing key') | |
assert cm['c']['c1'] == 1 | |
assert cm['c']['c2'] == 2 | |
assert cm['c']['c3'] == 3 | |
assert 'a' in cm and 'b' in cm and 'd' in cm | |
assert cm.get('a', 10) == 1 | |
assert cm.get('b', 20) == 2 | |
assert cm.get('d', 30) == 4 | |
assert cm.get('f', 40) == 40 | |
testDicts = [ | |
{ | |
'a': 1001, | |
'b': { | |
'ba': { | |
'baa': 1211 | |
}, | |
'bb': 1022, | |
}, | |
'c': 1003, | |
}, | |
{ | |
'a': 2001, | |
'e': { | |
'ea': 2051 | |
} | |
}, | |
{ | |
'a': 3001, | |
'b': { | |
'ba': { | |
'baa': 3211, | |
'bab': 3212, | |
}, | |
'bb': 3022 | |
}, | |
'd': { | |
'da': 3041 | |
} | |
}, | |
] | |
cmt = ChainMapTree(*testDicts) | |
assert cmt['a'] == 1001 | |
assert cmt['b']['ba']['baa'] == 1211 | |
assert cmt['b']['ba']['bab'] == 3212 | |
assert cmt['b']['bb'] == 1022 | |
assert cmt['c'] == 1003 | |
assert cmt['d']['da'] == 3041 | |
assert cmt['e']['ea'] == 2051 | |
print('ok') |
I found adding this method was useful:
def to_dict(self):
"""Convert to dict
>>> a = {'a': {'x': 1, 'z': 3}, 'foo': "a's foo"}
>>> b = {'a': {'y': 222, 'z': 333}, 'foo': "b's foo"}
>>> cm = ChainMapTree(a, b)
>>> # It acts like a dict when you ask for items, but is not a dict. If you want a dict, do this:
>>> cm.to_dict()
{'a': {'x': 1, 'z': 3, 'y': 222}, 'foo': "a's foo"}
>>> # Compare to normal/flat/not-nested chaining:
>>> dict(a, **b) # Note the precedence is the inverse of ChainMapTree!
{'a': {'y': 222, 'z': 333}, 'foo': "b's foo"}
>>>
>>> # See what you get if you specify b before a
>>> ChainMapTree(b, a).to_dict()
{'a': {'y': 222, 'z': 333, 'x': 1}, 'foo': "b's foo"}
>>> # Compare to normal/flat/not-nested chaining:
>>> dict(b, **a) # Note the precedence is the inverse of ChainMapTree!
{'a': {'x': 1, 'z': 3}, 'foo': "a's foo"}
"""
d = dict()
for k in self:
v = self[k]
if not isinstance(v, ChainMapTree):
d[k] = v
else:
d[k] = v.to_dict()
return d
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In case it's useful to anyone: A doc test with different data.