Skip to content

Instantly share code, notes, and snippets.

@syaikhipin
Created February 8, 2024 22:00
Show Gist options
  • Save syaikhipin/3cbc3cca8834f2fbb64075d237f7857d to your computer and use it in GitHub Desktop.
Save syaikhipin/3cbc3cca8834f2fbb64075d237f7857d to your computer and use it in GitHub Desktop.
Convert-LLM-Agridata
#!/usr/bin/env python3
import os
import sys
import fnmatch
import json
def get_ignore_list(ignore_file_path):
ignore_list = []
with open(ignore_file_path, 'r') as ignore_file:
for line in ignore_file:
if sys.platform == "win32":
line = line.replace("/", "\\")
ignore_list.append(line.strip())
return ignore_list
def should_ignore(file_path, ignore_list):
for pattern in ignore_list:
if fnmatch.fnmatch(file_path, pattern):
return True
return False
def process_repository(repo_path, ignore_list, output_file):
for root, _, files in os.walk(repo_path):
for file in files:
if not file.endswith('.json'):
continue # Skip non-JSON files
file_path = os.path.join(root, file)
relative_file_path = os.path.relpath(file_path, repo_path)
if not should_ignore(relative_file_path, ignore_list):
with open(file_path, 'r', errors='ignore') as file:
data_model = json.load(file)
output_file.write("-" * 4 + "\n")
output_file.write(f"{relative_file_path}\n")
output_file.write(f"{json.dumps(data_model, indent=2)}\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python convert.py /path/to/git/repository [-p /path/to/preamble.txt] [-o /path/to/output_file.txt]")
sys.exit(1)
repo_path = sys.argv[1]
ignore_file_path = os.path.join(repo_path, ".gptignore")
if sys.platform == "win32":
ignore_file_path = ignore_file_path.replace("/", "\\")
if not os.path.exists(ignore_file_path):
# try and use the .gptignore file in the current directory as a fallback.
HERE = os.path.dirname(os.path.abspath(__file__))
ignore_file_path = os.path.join(HERE, ".gptignore")
preamble_file = None
if "-p" in sys.argv:
preamble_file = sys.argv[sys.argv.index("-p") + 1]
output_file_path = 'output.txt'
if "-o" in sys.argv:
output_file_path = sys.argv[sys.argv.index("-o") + 1]
if os.path.exists(ignore_file_path):
ignore_list = get_ignore_list(ignore_file_path)
else:
ignore_list = []
with open(output_file_path, 'w') as output_file:
if preamble_file:
with open(preamble_file, 'r') as pf:
preamble_text = pf.read()
output_file.write(f"{preamble_text}\n")
else:
output_file.write("The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\n")
process_repository(repo_path, ignore_list, output_file)
with open(output_file_path, 'a') as output_file:
output_file.write("--END--")
print(f"Repository contents written to {output_file_path}.")
import unittest
import os
import tempfile
import shutil
from gpt_repository_loader import process_repository, get_ignore_list
class TestGPTRepositoryLoader(unittest.TestCase):
def setUp(self):
self.test_data_path = os.path.join(os.path.dirname(__file__), 'test_data')
self.example_repo_path = os.path.join(self.test_data_path, 'example_repo')
def test_end_to_end(self):
# Set up the output file and the expected output file paths
output_file_path = os.path.join(tempfile.mkdtemp(), 'output.txt')
expected_output_file_path = os.path.join(self.test_data_path, 'expected_output.txt')
# Create an ignore list for the example repository
ignore_file_path = os.path.join(self.example_repo_path, ".gptignore")
if os.path.exists(ignore_file_path):
ignore_list = get_ignore_list(ignore_file_path)
else:
ignore_list = []
# Run the gpt-repository-loader script on the example repository
with open(output_file_path, 'w') as output_file:
process_repository(self.example_repo_path, ignore_list, output_file)
# Compare the output to the expected output
with open(output_file_path, 'r') as output_file, open(expected_output_file_path, 'r') as expected_output_file:
self.assertEqual(output_file.read(), expected_output_file.read())
# Clean up the output file
shutil.rmtree(os.path.dirname(output_file_path))
def test_placeholder(self):
self.assertTrue(True)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment