Skip to content

Instantly share code, notes, and snippets.

@tin2tin
Last active August 15, 2024 14:42
Show Gist options
  • Save tin2tin/2ee71b898be894b6cd4cd17605d54f7b to your computer and use it in GitHub Desktop.
Save tin2tin/2ee71b898be894b6cd4cd17605d54f7b to your computer and use it in GitHub Desktop.
Download, get md5 and produce json, for GPT4ALL models
import requests
from tqdm import tqdm
from pathlib import Path
import hashlib
import json
def download_file_with_progress(url, save_path):
"""
Downloads a file from a URL with a progress bar.
Args:
- url (str): The URL of the file to download.
- save_path (Path): The local path where the file should be saved, including the filename.
"""
# Ensure the directory exists
save_path.parent.mkdir(parents=True, exist_ok=True)
# Send a GET request to the URL
response = requests.get(url, stream=True)
response.raise_for_status()
# Get the total file size from the headers
total_size = int(response.headers.get('content-length', 0))
# Open the file in binary write mode
with open(save_path, "wb") as file:
# Create a progress bar
with tqdm(total=total_size, unit='B', unit_scale=True, desc=str(save_path), initial=0, ascii=True) as pbar:
# Write the response content to the file in chunks
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
pbar.update(len(chunk))
print(f"File downloaded successfully and saved to {save_path}")
def calculate_md5(file_path):
"""
Calculates the MD5 checksum of a file.
Args:
- file_path (Path): The path of the file.
Returns:
- str: The MD5 checksum of the file.
"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def create_metadata_json(save_path, md5sum, filesize, url):
"""
Creates a metadata JSON file with the given details.
Args:
- save_path (Path): The local path of the downloaded file.
- md5sum (str): The MD5 checksum of the file.
- filesize (int): The size of the file in bytes.
"""
data = {
"order": "a",
"md5sum": md5sum,
"name": save_path.name,
"filename": save_path.name,
"filesize": str(filesize),
"requires": "3.1.1",
"ramrequired": "8",
"parameters": "8 billion",
"quant": "q8_0",
"type": "LLaMA3",
"description": "<ul><li>Fast responses</li><li>Chat based model</li><li>Large context size of 128k</li><li>Accepts agentic system prompts in Llama 3.1 format</li><li>Trained by Meta</li><li>License: <a href=\"https://llama.meta.com/llama3_1/license/\">Meta Llama 3.1 Community License</a></li></ul>",
"url": url,
"promptTemplate": "user\n\n%1assistant\n\n%2",
"systemPrompt": "system\nCutting Knowledge Date: December 2023\n\nYou are a helpful assistant."
}
# Save the JSON to a file
metadata_path = save_path.with_suffix('.json')
with open(metadata_path, "w") as json_file:
json.dump(data, json_file, indent=4)
print(f"Metadata JSON created successfully at {metadata_path}")
# Main execution
url = "https://huggingface.co/mradermacher/Danish-Mistral-Nemo-Base-GGUF/resolve/main/Danish-Mistral-Nemo-Base.Q8_0.gguf"
save_path = Path.home() / '.cache' / 'gpt4all' / 'Danish-Mistral-Nemo-Base.Q8_0.gguf'
# Download the file
download_file_with_progress(url, save_path)
# Calculate MD5 checksum and file size
md5sum = calculate_md5(save_path)
filesize = save_path.stat().st_size
# Create the metadata JSON file
create_metadata_json(save_path, md5sum, filesize, url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment