Created
October 17, 2019 01:29
-
-
Save raphaelmerx/aa9b69b00b8cb0c8a0408a0162399c86 to your computer and use it in GitHub Desktop.
Convert zawgyi in django.po to unicode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import re | |
import json | |
with open('projectbank/locale/my/LC_MESSAGES/django.po') as f: | |
original_lines = f.readlines() | |
FIND_MSGSTR = re.compile(r'msgstr "(.*)"') | |
myanmar_text = [] | |
for line in original_lines: | |
match = FIND_MSGSTR.search(line) | |
if match is not None: | |
myanmar_text.append(match.group(1)) | |
with open('myanmar_text.txt', 'w') as f: | |
f.write('\n'.join(myanmar_text)) | |
# ### interlude: detect_zawgyi.js generated zawgyiToUnicode.json | |
with open('zawgyiToUnicode.json') as f: | |
zawgyiToUnicode = json.load(f) | |
new_lines = [] | |
for original_line in original_lines: | |
match = FIND_MSGSTR.search(original_line) | |
if match is not None and match.group(1) in zawgyiToUnicode: | |
new_line = FIND_MSGSTR.sub('msgstr "{}"'.format(zawgyiToUnicode[match.group(1)]), original_line) | |
new_lines.append(new_line) | |
else: | |
new_lines.append(original_line) | |
with open('django.po', 'w') as f: | |
f.write(''.join(new_lines)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const google_myanmar_tools = require("myanmar-tools"); | |
const detector = new google_myanmar_tools.ZawgyiDetector(); | |
const converter = new google_myanmar_tools.ZawgyiConverter(); | |
var fs = require('fs'), | |
path = require('path'), | |
filePath = path.join(__dirname, 'myanmar_text.txt'); | |
zawgyiToUnicode = {} | |
fs.readFile(filePath, {encoding: 'utf-8'}, function(err,data){ | |
data = data.split('\n'); | |
data.forEach(function(stringInput) { | |
var score = detector.getZawgyiProbability(stringInput); | |
if (score > 0.95) { | |
var output = converter.zawgyiToUnicode(stringInput); | |
zawgyiToUnicode[stringInput] = output; | |
} | |
}); | |
var json = JSON.stringify(zawgyiToUnicode); | |
fs.writeFile('zawgyiToUnicode.json', json, 'utf8', function() { console.log('done') }); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment