Skip to content

Instantly share code, notes, and snippets.

@Martin91
Last active September 28, 2018 03:19
Show Gist options
  • Save Martin91/725b6231cdde0e7c5d8f7726e1132996 to your computer and use it in GitHub Desktop.
Save Martin91/725b6231cdde0e7c5d8f7726e1132996 to your computer and use it in GitHub Desktop.
Python itertools.groupby trap
data = [{'id': 1, 'value': 1}, {'id': 2, 'value': 2}, {'id': 2, 'value': 3}, {'id': 1, 'value': 4}]
for id, ele in groupby(data, key=lambda e: e['id']):
print "id: %d" % id
print "values: %s" % list(ele)
# => id: 1
# => values: [{'id': 1, 'value': 1}]
# => id: 2
# => values: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]
# => id: 1
# => values: [{'id': 1, 'value': 4}]
{key: list(value) for key, value in groupby(data, key=lambda x: x['id'])} # Dangerous!
# => {1: [{'id': 1, 'value': 4}], 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]} # missing the first element!
{key: list(value) for key, value in groupby(sorted(data, key=lambda x: x['id']), key=lambda x: x['id'])}
# => {1: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}],
# 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]}
# According to the official document, https://docs.python.org/2/library/itertools.html#itertools.groupby
#
# It generates a break or new group every time the value of the key function changes (which is why it is
# usually necessary to have sorted the data using the same key function)
#
# (╯‵□′)╯︵┻━┻
def sorted_groupby(iterable, key=None):
"""
# https://docs.python.org/2/library/itertools.html#itertools.groupby
The built-in `itertools.groupby` has a trap that it generates a break or new
group every time the value of the key function changes, that is, it requires
developers to sort the data using the same key function firstly. This function
acts as a wrapper to sort automatically so that from now on you can always
group data safely
"""
iterable = sorted(iterable, key=key)
return itertools.groupby(iterable=iterable, key=key)
def groupby_to_dict(iterable, key=None):
"""
Sort and group `iterable` with provided key function, and then return a dict
represents the grouped data
"""
return {k: list(v) for k, v in sorted_groupby(iterable=iterable, key=key)}
for k, v in sorted_groupby(data, key=lambda x: x['id']):
print "key: %d" % k
print "value: %s" % list(v)
# => key: 1
# => value: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}]
# => key: 2
# => value: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]
groupby_to_dict(data, key=lambda x: x['id'])
# => {1: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}],
# => 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]}
data = [1, 2, 3, 2, 3, 4, 1, 1, 2]
groupby_to_dict(data)
# => {1: [1, 1, 1], 2: [2, 2, 2], 3: [3, 3], 4: [4]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment