Created
February 23, 2018 10:09
-
-
Save statguy/8ca45b9c22c8ce207616dd6679eb0a8c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. Copy table from http://thai-language.com/ref/starred | |
# 2. Paste to Google Sheets | |
# 3. Save as CSV | |
# 4. Run through top_1000_thai_words.R | |
# 5. Upload to Anki App via https://api.ankiapp.com/nexus/ | |
library(tidyverse) | |
library(stringr) | |
setwd("~/Downloads") | |
words <- read.csv("Top 1000 Thai words - Sheet1.tsv", sep = '\t', header = FALSE, quote = '', stringsAsFactors = FALSE) | |
set_tone_word <- function(x) { | |
tone <- str_sub(x, -1, -1) | |
word <- str_sub(x, 1, -2) | |
if (tone == 'H') str_replace_all(str_c(word), c('a'='á', 'e'='é', 'i'='í', 'o'='ó', 'u'='ú')) | |
else if (tone == 'L') str_replace_all(str_c(word), c('a'='à', 'e'='è', 'i'='ì', 'o'='ò', 'u'='ù')) | |
else if (tone == 'F') str_replace_all(str_c(word), c('a'='â', 'e'='ê', 'i'='î', 'o'='ô', 'u'='û')) | |
else if (tone == 'R') str_replace_all(str_c(word), c('a'='ǎ', 'e'='ě', 'i'='ǐ', 'o'='ŏ', 'u'='ǔ')) | |
else word | |
} | |
set_tone <- function(x) { | |
str_split(x, ' ') %>% lapply(function(x) lapply(x, set_tone_word)) %>% lapply(str_c, collapse = '-') | |
} | |
words2 <- words %>% mutate(V2 = set_tone(V2)) %>% unite(thai, V1, V2, V2, sep = ' = ') %>% rename(english = V3) | |
words2 %>% write.table(file = "top_1000_thai_words.tsv", sep = '\t', row.names = FALSE, col.names = FALSE, quote = FALSE) | |
words2 %>% select(english, thai) %>% write.table(file = "top_1000_thai_words_flipped.tsv", sep = '\t', row.names = FALSE, col.names = FALSE, quote = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment