Skip to content

Instantly share code, notes, and snippets.

@DrJackilD
Created November 3, 2018 19:55
Show Gist options
  • Save DrJackilD/c26d9371b69af5bc5c3621faf1c65e24 to your computer and use it in GitHub Desktop.
Save DrJackilD/c26d9371b69af5bc5c3621faf1c65e24 to your computer and use it in GitHub Desktop.
# Load Excel files in a folder and load content to simple class
# Using first row as a header
from typing import Tuple, Generator
from types import SimpleNamespace
import os
from openpyxl import load_workbook
INPUT_BASE_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'TheKnotResults_250cities')
class Row(SimpleNamespace):
pass
def read_input_rows() -> Tuple[str, Generator[Row, None, None]]:
def rows_generator(fp: str) -> Generator[Row, None, None]:
wb = load_workbook(fp)
ws = iter(wb.active)
# get headers
headers = ['_'.join(h.value.lower().split()) for h in next(ws)]
for line in ws:
values = {h: v for h, v in zip(headers, [c.value for c in line])}
yield Row(**values, details=None)
for filename in filter(lambda f: f.endswith('xlsx'), os.listdir(INPUT_BASE_FOLDER)):
filepath = os.path.join(INPUT_BASE_FOLDER, filename)
yield filename.split('.')[0], rows_generator(filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment