Created
February 8, 2024 22:00
-
-
Save syaikhipin/3cbc3cca8834f2fbb64075d237f7857d to your computer and use it in GitHub Desktop.
Convert-LLM-Agridata
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import fnmatch | |
import json | |
def get_ignore_list(ignore_file_path): | |
ignore_list = [] | |
with open(ignore_file_path, 'r') as ignore_file: | |
for line in ignore_file: | |
if sys.platform == "win32": | |
line = line.replace("/", "\\") | |
ignore_list.append(line.strip()) | |
return ignore_list | |
def should_ignore(file_path, ignore_list): | |
for pattern in ignore_list: | |
if fnmatch.fnmatch(file_path, pattern): | |
return True | |
return False | |
def process_repository(repo_path, ignore_list, output_file): | |
for root, _, files in os.walk(repo_path): | |
for file in files: | |
if not file.endswith('.json'): | |
continue # Skip non-JSON files | |
file_path = os.path.join(root, file) | |
relative_file_path = os.path.relpath(file_path, repo_path) | |
if not should_ignore(relative_file_path, ignore_list): | |
with open(file_path, 'r', errors='ignore') as file: | |
data_model = json.load(file) | |
output_file.write("-" * 4 + "\n") | |
output_file.write(f"{relative_file_path}\n") | |
output_file.write(f"{json.dumps(data_model, indent=2)}\n") | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python convert.py /path/to/git/repository [-p /path/to/preamble.txt] [-o /path/to/output_file.txt]") | |
sys.exit(1) | |
repo_path = sys.argv[1] | |
ignore_file_path = os.path.join(repo_path, ".gptignore") | |
if sys.platform == "win32": | |
ignore_file_path = ignore_file_path.replace("/", "\\") | |
if not os.path.exists(ignore_file_path): | |
# try and use the .gptignore file in the current directory as a fallback. | |
HERE = os.path.dirname(os.path.abspath(__file__)) | |
ignore_file_path = os.path.join(HERE, ".gptignore") | |
preamble_file = None | |
if "-p" in sys.argv: | |
preamble_file = sys.argv[sys.argv.index("-p") + 1] | |
output_file_path = 'output.txt' | |
if "-o" in sys.argv: | |
output_file_path = sys.argv[sys.argv.index("-o") + 1] | |
if os.path.exists(ignore_file_path): | |
ignore_list = get_ignore_list(ignore_file_path) | |
else: | |
ignore_list = [] | |
with open(output_file_path, 'w') as output_file: | |
if preamble_file: | |
with open(preamble_file, 'r') as pf: | |
preamble_text = pf.read() | |
output_file.write(f"{preamble_text}\n") | |
else: | |
output_file.write("The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\n") | |
process_repository(repo_path, ignore_list, output_file) | |
with open(output_file_path, 'a') as output_file: | |
output_file.write("--END--") | |
print(f"Repository contents written to {output_file_path}.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import os | |
import tempfile | |
import shutil | |
from gpt_repository_loader import process_repository, get_ignore_list | |
class TestGPTRepositoryLoader(unittest.TestCase): | |
def setUp(self): | |
self.test_data_path = os.path.join(os.path.dirname(__file__), 'test_data') | |
self.example_repo_path = os.path.join(self.test_data_path, 'example_repo') | |
def test_end_to_end(self): | |
# Set up the output file and the expected output file paths | |
output_file_path = os.path.join(tempfile.mkdtemp(), 'output.txt') | |
expected_output_file_path = os.path.join(self.test_data_path, 'expected_output.txt') | |
# Create an ignore list for the example repository | |
ignore_file_path = os.path.join(self.example_repo_path, ".gptignore") | |
if os.path.exists(ignore_file_path): | |
ignore_list = get_ignore_list(ignore_file_path) | |
else: | |
ignore_list = [] | |
# Run the gpt-repository-loader script on the example repository | |
with open(output_file_path, 'w') as output_file: | |
process_repository(self.example_repo_path, ignore_list, output_file) | |
# Compare the output to the expected output | |
with open(output_file_path, 'r') as output_file, open(expected_output_file_path, 'r') as expected_output_file: | |
self.assertEqual(output_file.read(), expected_output_file.read()) | |
# Clean up the output file | |
shutil.rmtree(os.path.dirname(output_file_path)) | |
def test_placeholder(self): | |
self.assertTrue(True) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment