Skip to content

Instantly share code, notes, and snippets.

@dholth
Created July 29, 2024 21:11
Show Gist options
  • Save dholth/bbd033c1aade0a4c4d67834d2c58781a to your computer and use it in GitHub Desktop.
Save dholth/bbd033c1aade0a4c4d67834d2c58781a to your computer and use it in GitHub Desktop.
Build .conda packages with conda-package-streaming
@contextmanager
def builder(
destination, file_id, is_info=lambda filename: filename.startswith("info/")
):
"""
Yield TarFile object for adding files, then transmute to "{destination}/{file_id}.conda"
"""
# Stream through a pipe instead of collecting all data in a temporary
# tarfile. Underlying transmute_stream collects data into separate pkg, info
# tar to be able to send complete size to zstd, so this strategy avoids one
# temporary file but not all of them. Compare to conda-package-handling 2.3
# which uses less temporary space but reads every input file twice; once to
# count the size and a second time to stream into a zstd compressor.
r, w = os.pipe()
with open(r, mode="rb") as reader, open(w, mode="wb") as writer:
def transmute_thread():
with tarfile.open(fileobj=reader, mode="r|") as tar:
transmute_stream(
file_id,
destination,
package_stream=((tar, entry) for entry in tar),
is_info=is_info,
)
t = threading.Thread(target=transmute_thread)
t.start()
with tarfile.open(fileobj=writer, mode="w|") as tar:
yield tar
t.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment