Skip to content

Instantly share code, notes, and snippets.

@Witty-Kitty
Last active April 11, 2023 15:42
Show Gist options
  • Save Witty-Kitty/96894aadfbe971997d27cdbb1d14196e to your computer and use it in GitHub Desktop.
Save Witty-Kitty/96894aadfbe971997d27cdbb1d14196e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import epitran
def get_phoneme_string(epi,word):
phoneme_tuples = epi.word_to_tuples(word)
num = 0
phoneme_list = []
while num < len(phoneme_tuples):
phoneme_list.append(phoneme_tuples[num][3])
num += 1
return ' '.join(phoneme_list)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-f",
"--file",
"--frequencies-file",
required=True,
help="path to .txt file containing words and their frequencies",
type=str
)
epi = epitran.Epitran('swa-Latn')
args = parser.parse_args()
lexicon_info_tuples = []
with open(args.file, 'r') as f:
for line in f:
tuple_items = line.strip().split() # split each line into tuple items
phonemes = get_phoneme_string(epi,tuple_items[0])
my_tuple = (tuple_items[0], phonemes, tuple_items[1]) # create a tuple from the items
lexicon_info_tuples.append(my_tuple)
with open('lexicon-default.xml', 'w') as f:
f.write('<lexicon> \n')
for lexicon_tuple in lexicon_info_tuples:
f.write('<lex pron="' + lexicon_tuple[1] + '" default="true" freq="' + lexicon_tuple[2] + '">' + lexicon_tuple[0] + '</lex> \n')
f.write('</lexicon>')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment