Skip to content

Instantly share code, notes, and snippets.

@jymchng
Last active June 15, 2022 16:11
Show Gist options
  • Save jymchng/ce3a89fced820470cf87cf55e4b77a4c to your computer and use it in GitHub Desktop.
Save jymchng/ce3a89fced820470cf87cf55e4b77a4c to your computer and use it in GitHub Desktop.
Given a Excel (.xlsx) file with merged cells, returns a pandas DataFrame with its unmerged cells' values appropriately filled.
from openpyxl.workbook import Workbook
from openpyxl import load_workbook
from openpyxl.utils.cell import range_boundaries
import pandas as pd
import numpy as np
def _parse_xlxs_merged_cells(filepath, how='top-left'):
"""
Takes in a Path-like object specifying the .xlsx file and returns a pandas DataFrame with unmerged cells' values
appropriately filled.
-params-
filepath: Path-like
how: a string, specifying which cell's value to be copied to all unmerged cells, default is 'top-left'
-returns-
pandas DataFrame with with unmerged cells' filled with specified values
"""
df = pd.read_excel(filepath)
how_mapping = {'top-left': (1, 0)}
wb = load_workbook(filename=filepath)
for st_name in wb.sheetnames:
st = wb[st_name]
mcr_coord_list = [mcr.coord for mcr in st.merged_cells.ranges]
range_bound_arr = np.array([range_boundaries(mcr) for mcr in mcr_coord_list])
range_bound_arr_pyindexes = range_bound_arr - [1, 2, 1, 2]
for range_bound_arr_pyindex in range_bound_arr_pyindexes:
# min_col, min_row, max_col, max_row
# 0, , 1 , 2 , 3
df.iloc[range_bound_arr_pyindex[1]:range_bound_arr_pyindex[-1]+1, range_bound_arr_pyindex[0]:range_bound_arr_pyindex[-2]+1] = \
df.iloc[range_bound_arr_pyindex[how_mapping[how][0]],range_bound_arr_pyindex[how_mapping[how][1]]]
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment