If you need to read many files into one dataframe use this snippet:
from multiprocessing import Pool
from tqdm import tqdm
import pandas as pd
def file_parser_func(fn : str):
"""
Read a file into a dataframe and return a list of dictionaries
"""
return pd.read_csv(fn).to_dict('records')
files = ['a.csv', 'b.csv']
data = []
with Pool(processes=8) as pool:
for record in tqdm(pool.imap_unordered(file_parser_func, files),
total=len(files)):
data.extend(record)
df = pd.DataFrame(data)
If you have a large dataframe and you need to perform an apply function try this software:
from pandarallel import pandarallel
n_jobs = 8
pandarallel.initialize(nb_workers=n_jobs, progress_bar=True)
df.parallel_apply(some_func, axis=1)