Skip to content

Instantly share code, notes, and snippets.

@Cyberes
Last active June 22, 2023 03:04
Show Gist options
  • Save Cyberes/f276da3f5f49e981244a8330d74f940d to your computer and use it in GitHub Desktop.
Save Cyberes/f276da3f5f49e981244a8330d74f940d to your computer and use it in GitHub Desktop.
Convert SillyTavern jsonl chats to TXT files
#!/usr/bin/env python3
import argparse
import re
from pathlib import Path
import sys
import json
"""
Convert SillyTavern jsonl chats to TXT files.
HOW TO USE:
1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file>
These things are stripped from the chat:
- OOC chat like this: (OOC: bla bla)
- Text between brackets: [bla bla bla]
This script isn't going to produce a perfect transcript, but it's close enough.
"""
cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))')
def main():
parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
parser.add_argument('filepath', help='The path to the jsonl file to parse')
parser.add_argument('output_txt', help='The output TXT file to create.')
args = parser.parse_args()
input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
output_txt = Path(args.output_txt).expanduser().absolute().resolve()
print('Converting chat:', input_jsonl)
if not input_jsonl.exists():
print('Input file does not exist:', input_jsonl)
sys.exit(1)
if not output_txt.parent.exists():
print('Output parent directory does not exist:', output_txt.parent)
sys.exit(1)
chatlines = []
formatted_chat = []
raw = input_jsonl.read_text().splitlines()
for i in range(len(raw)):
try:
chatlines.append(json.loads(raw[i]))
except json.decoder.JSONDecodeError:
print(f'JSON decode error on line {i + 1}:')
sys.exit(1)
for msg in chatlines:
if 'mes' in msg.keys():
clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
if not len(clean_str):
continue
formatted_chat.append({'name': msg['name'], 'msg': clean_str})
with open(output_txt, 'w') as f:
for msg in formatted_chat:
# content = json.loads('"' + msg['msg'] + '"')
f.write(f"{msg['name']}:\n{msg['msg']}\n\n")
print(f'Converted {len(formatted_chat)} lines.')
print('Saved to:', output_txt)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment