Last active
December 19, 2022 02:39
-
-
Save Jackster/b7828669b4841d1f122aafc97a0e5f81 to your computer and use it in GitHub Desktop.
Python to translate ProjectTorque dialogue files into whatever language you want using AWS Translate SDK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import html | |
import re | |
# Set your AWS access key and secret key | |
ACCESS_KEY = "your_access_key" | |
SECRET_KEY = "your_secret_key" | |
# Set the region you want to use | |
REGION = "eu-west-2" # London region | |
# Create a boto3 session | |
session = boto3.session.Session( | |
aws_access_key_id=ACCESS_KEY, | |
aws_secret_access_key=SECRET_KEY, | |
region_name=REGION | |
) | |
# Create a client for the AWS Translate service | |
client = session.client("translate") | |
# open the file in read mode | |
with open('crcen.dlg', 'r') as file: | |
# read the contents of the file | |
contents = file.read() | |
# use a regular expression to find all lines that match the pattern | |
pattern = r'(.*)<Line value="(.+?)" />(.*)' | |
# create an empty list to store the translated lines | |
translated_lines = [] | |
# iterate over the lines | |
for line in contents.split('\n'): | |
# check if the line matches the pattern | |
match = re.match(pattern, line) | |
if match: | |
# get the groups from the match | |
prefix = match.group(1) | |
text = match.group(2) | |
suffix = match.group(3) | |
# decode the character entities in the text | |
decoded_text = html.unescape(text) | |
# check if the text should be skipped | |
if decoded_text.startswith('/') or '<' in decoded_text or '>' in decoded_text: | |
# add the original line to the list | |
translated_lines.append(line) | |
else: | |
# translate the text from English to French | |
translation = client.translate_text(Text=decoded_text, SourceLanguageCode='en', TargetLanguageCode='fr') | |
# get the translated text from the response | |
translated_text = translation['TranslatedText'] | |
# add the translated line to the list | |
translated_lines.append(f'{prefix}<Line value="{translated_text}" />{suffix}') | |
else: | |
# add the original line to the list | |
translated_lines.append(line) | |
# open the new file in write mode | |
with open('crcen.dlg.txt', 'w', encoding="utf-8") as file: | |
# write the translated lines to the file | |
file.write('\n'.join(translated_lines)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code was generated using ChatGTP. While it "works", it does not handle some characters too well.
A lot of text is also for chat commands or feature character entities that either Python or AWS will translate into the actual symbol.