Skip to content

Instantly share code, notes, and snippets.

@mattwang44
Created July 28, 2024 06:59
Show Gist options
  • Save mattwang44/51f97e56b695758003117db6bac05b31 to your computer and use it in GitHub Desktop.
Save mattwang44/51f97e56b695758003117db6bac05b31 to your computer and use it in GitHub Desktop.
Visualize zh_TW translation progress of Python official docs
"""Visualize zh_TW translation progress of Python official docs
Run the script under the project root of the repo.
ref: https://github.com/python/python-docs-zh-tw
"""
import logging
from pathlib import Path
import pandas as pd
import polib
from git import Repo
root_path = Path(__file__)
repo = Repo(root_path)
current_head = repo.head.commit
repo.git.checkout('3.12')
def _get_po_paths(root: Path) -> list[Path]:
"""Find all .po files in given path"""
if not root.exists():
logging.error(f"The path '{root.absolute()}' does not exist!")
if root.is_file():
return [root.resolve()]
po_paths = [p.resolve() for p in root.glob("**/*.po") if '.git' not in str(p)]
return po_paths
def _get_progress() -> float:
po_files = _get_po_paths(Path(root_path).resolve())
errors = []
num_translated = 0
num_entries = 0
for path in po_files:
try:
pofile = polib.pofile(path)
except OSError:
errors.append(f"{path} doesn't seem to be a .po file")
continue
num_entries += (len(pofile) - len(pofile.obsolete_entries()))
num_translated += len(pofile.translated_entries())
progress = round(100 * num_translated / num_entries, 2)
if errors:
print(errors)
return progress
if __name__ == '__main__':
memo = {}
prev_commit_time = None
commits = list(repo.iter_commits())
for commit in commits:
if prev_commit_time and commit.committed_datetime.date() == prev_commit_time.date():
print(commit.committed_datetime, commit, 'skipped')
continue
else:
print(commit.committed_datetime, commit)
prev_commit_time = commit.committed_datetime
repo.git.checkout(commit)
try:
progress = _get_progress()
memo[commit] = (commit.committed_datetime.timestamp(), progress)
except Exception as e:
print(e)
repo.git.checkout(current_head)
# plot
df = pd.DataFrame(memo).T
df.columns = ['timestamp', 'progress']
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df['progress'] = df['progress'].astype(float)
df['date'] = df['timestamp'].dt.date
ax = df.plot(x='date', y='progress', title='Translation progress based on commit history')
ax.set_xlabel('Date')
ax.set_ylabel('Progress (%)')
ax.grid()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment