-
Install flatbuffers using your favorite package manager, or from source: https://google.github.io/flatbuffers/
-
Download the flatbuffer definition for the feather metadata: https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs
-
Generate Python bindings for the feather metadata:
flatc -p metadata.fbs
In R (or Python) write something simple like the iris dataset to a feather file:
library(feather)
data(iris)
write_feather(iris, 'iris.feather')
import struct
import numpy as np
# this was generated by flatc in (3) above
from feather.fbs.CTable import CTable
# read the feather obj into memory
f = open('iris.feather', 'rb').read()
# get the size in bytes of the metadata section
# [data, data, data, ..., data| <uint32> metadata size | <4-byte "FEA1">]
# see the feather format description:
# https://github.com/wesm/feather/blob/master/doc/FORMAT.md
meta_size = struct.unpack('I', f[-8:-4])[0]
meta_buf = bytearray(f[-8-meta_size:-8])
meta = CTable.GetRootAsCTable(meta_buf, 0)
# get the number of columns
print(meta.ColumnsLength())
# decode the 1st column
c = meta.Columns(0)
print(c.Name())
col_loc = c.Values()
print(col_loc.Type()) # 10 - meaning DOUBLE
# unpack into a numpy array
sepal_length = np.frombuffer(f[col_loc.Offset():col_loc.Offset() + col_loc.TotalBytes()], dtype=np.double)
print(sepal_length)