Skip to content

Instantly share code, notes, and snippets.

@sheriffff
Last active August 29, 2024 08:41
Show Gist options
  • Save sheriffff/60dcde55c5ddff3776bf70d76d25ec62 to your computer and use it in GitHub Desktop.
Save sheriffff/60dcde55c5ddff3776bf70d76d25ec62 to your computer and use it in GitHub Desktop.
Jupyter notebook translator

Usage

run python jupyter_translate.py, and you will be asked for 3 inputs:

  • path to notebook
  • source language
  • target language

A new notebook will be created in target language in the same path as notebook.
It will preserve original language cells too, for easier human debugging of the translation.

You need a DeepL API

import json
import os
import sys
import deepl
import readline
my_key = open("./my_key.txt", "r").read().strip()
translator = deepl.Translator(my_key)
def needs_translate(cell):
if is_toc_cell(cell):
return False
if is_code_cell(cell):
return False
if is_html_cell(cell):
return False
return True
def get_cell_text(cell):
return "\n".join(cell["source"])
def is_latex_cell(cell):
cell_text = get_cell_text(cell)
return cell_text.startswith("$")
def is_html_cell(cell):
cell_text = get_cell_text(cell)
return cell_text.startswith("<")
def is_toc_cell(cell):
return cell["metadata"].get("toc")
def is_code_cell(cell):
return cell["cell_type"] == "code"
def deepl_translate(text, from_lang, to_lang):
result = translator.translate_text(text, source_lang=from_lang, target_lang=to_lang)
translated_text = result.text
return translated_text
def generate_translated_notebook(source_lang, target_lang, filename):
with open(filename, "rb") as f:
notebook = json.load(f)
# clone old notebook (maintain metadata, indent, etc) and reset cells
new_notebook = notebook.copy()
new_notebook["cells"] = []
n_translations = 0
for i, original_cell in enumerate(notebook["cells"]):
if i % 10 == 0:
print(f"Evaluating cell #{i}")
new_notebook["cells"].append(original_cell)
if not needs_translate(original_cell):
continue
# else: needs translation
n_translations += 1
new_cell = original_cell.copy()
original_text = get_cell_text(original_cell)
translated_text = deepl_translate(original_text, source_lang, target_lang)
new_cell['source'] = translated_text
new_notebook["cells"].append(new_cell)
print(f"Translated {n_translations}/{i} cells")
filepath_translated = f"{filename.split('.ipynb')[0]}_{target_lang}.ipynb"
with open(filepath_translated, "w") as f:
print(f"You can find translated notebook here: {filepath_translated}")
json.dump(new_notebook, f, indent=2)
if __name__ == "__main__":
readline.set_completer_delims(' \t\n=')
readline.parse_and_bind("tab: complete")
filename = input("File path: ")
print(f"You entered: {filename}")
if not os.path.exists(filename):
print("That file path does not exist")
sys.exit()
source_lang = input("Source language: ")
target_lang = input("Target language: ")
generate_translated_notebook(source_lang, target_lang, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment