- install
pyarrow
import pyarrow.parquet as pq
fhv_2019_01 = r"/home/nervuzz/tmp/fhv_tripdata_2019-01.parquet"
fhv_2019_12 = r"/home/nervuzz/tmp/fhv_tripdata_2019-12.parquet"
fhv_table_01 = pq.read_table(fhv_2019_01)
fhv_table_12 = pq.read_table(fhv_2019_12)
print(f"fhv_tripdata_2019-01.parquet schema:\n{fhv_table_01.schema}")
print(f"\fhv_tripdata_2019-12.parquet schema:\n{fhv_table_12.schema}")
Output:
fhv_tripdata_2019-01.parquet schema:
dispatching_base_num: string
pickup_datetime: timestamp[ms]
dropoff_datetime: timestamp[ms]
PULocationID: int64
DOLocationID: int64
SR_Flag: int64
hv_tripdata_2019-12.parquet schema:
dispatching_base_num: string
pickup_datetime: timestamp[ms]
dropoff_datetime: timestamp[ms]
PULocationID: int64
DOLocationID: int64
SR_Flag: null