import pandas as pd
df = pd.DataFrame({'x': [1, 1, 1, 2, 2, 3],
'y': [1, 2, 3, 1, 2, 1],
'n': [3, 2, 1, 1, 2, 1]})
df
x | y | n | |
---|---|---|---|
0 | 1 | 1 | 3 |
1 | 1 | 2 | 2 |
2 | 1 | 3 | 1 |
3 | 2 | 1 | 1 |
4 | 2 | 2 | 2 |
5 | 3 | 1 | 1 |
# wrap the function into pipe
# a handy way to chain functions
# pipe can return arbitrary objects
# and can be handy in some awkward pandas operations
df.groupby('x').y.pipe(lambda df: pd.DataFrame({'frequency' : df.sum(),
'second' : df.nth(1)}
)
)
frequency | second | |
---|---|---|
x | ||
1 | 6 | 2.0 |
2 | 3 | 2.0 |
3 | 1 | NaN |