This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import epitran | |
def get_phoneme_string(epi,word): | |
phoneme_tuples = epi.word_to_tuples(word) | |
num = 0 | |
phoneme_list = [] | |
while num < len(phoneme_tuples): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
from collections import Counter | |
from pathlib import Path | |
import string | |
def word_frequency(text_directory: Path) -> Counter: | |
"""Get the word frequency table for a directory of text files.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
baba data - my father | |
baba iso - your father | |
baba ise - his/her father | |
mama mama - my mother | |
mama nyoko - your mother | |
mama nyina - his/her mother | |
kaka musaza - my brother(girl) | |
kaka mukuru - older brother(boy) | |
kaka murumuna - younger brother(boy) | |
dada mushiki - sister to a boy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gensim | |
from sklearn.manifold import TSNE | |
import matplotlib.pyplot as plt | |
model = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-rw1.txt') | |
model2 = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-lg1.txt') | |
lg_dict = { | |
"katonda": "god", | |
"emu": "one", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.tokenize import word_tokenize | |
from nltk.text import Text | |
# read in text data | |
file = open("crawl-for-parallel-corpora/DataSet/luganda.txt", "r") | |
raw = file.read() | |
# tokenize | |
tokens = word_tokenize(raw) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.baseconverter; | |
import android.os.Bundle; | |
import android.app.Activity; | |
import android.content.Intent; | |
import android.view.View; | |
import android.widget.Button; | |
import android.widget.EditText; | |
import android.widget.TextView; |