Last active
September 9, 2020 14:01
-
-
Save ymotongpoo/5d87eafa4ff25ea0b797a173b00b430c to your computer and use it in GitHub Desktop.
A script to generate lesson files for gtypist. Currently generates eucalyn-layout lessons.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import itertools | |
import collections | |
import textwrap | |
import random | |
import shelve | |
import contextlib | |
eucalyn_lessons = [ | |
('it', '', 'home row-first finger', 'ngrams'), | |
('ek', '', 'home row-second finger', 'ngrams'), | |
('ek', 'it', 'home row-two fingers', 'words'), | |
# ('then', '', 'home row-two fingers', 'ngrams'), | |
('os', '', 'home row-third finger', 'ngrams'), | |
('os', 'itek', 'home row-three fingers', 'words'), | |
('an', '', 'home row-fourth finger', 'ngrams'), | |
('aoei', '', 'home row-left hand', 'words'), | |
('tksn', '', 'home row-right hand', 'words'), | |
('aoeitksn', '', 'home row-both hands', 'ngrams'), | |
('aoeitksn', '', 'home row-both hands', 'words'), | |
('lr', '', 'lr', 'ngrams'), | |
('lr', 'aoeitksn', 'homerow + lr', 'ngrams'), | |
('lr', 'aoeitksn', 'homerow + lr', 'words'), | |
('cudp', '', 'cudp', 'ngrams'), | |
('cudp', 'aoeitksn', 'cudp + homerow', 'ngrams'), | |
('cudp', 'aoeitksn', 'cudp + homerow', 'words'), | |
('gy', '', 'gy', 'ngrams'), | |
('gy', 'aoeitksn', 'gy + homerow', 'ngrams'), | |
('gy', 'aoeitksn', 'gy + homerow', 'words'), | |
('mfw', '', 'mfw', 'ngrams'), | |
('mfw', 'aoeitksn', 'mfw + homerow', 'ngrams'), | |
('mfw', 'aoeitksn', 'mfw + homerow', 'words'), | |
('kb', '', 'kb', 'ngrams'), | |
('kb', 'aoeitksn', 'kb + homerow', 'ngrams'), | |
('kb', 'aoeitksn', 'kb + homerow', 'words'), | |
('cudp', 'lraoeitksn', 'cumulative including cudp', 'words'), | |
('gy', 'cudplraoeitksn', 'cumulative including gy', 'words'), | |
('mfw', 'cudplrgyaoeitksn', 'cumulative including mfw', 'words'), | |
('kb', 'mfwcudplrgyaoeitksn', 'cumulative including kb', 'words'), | |
] | |
workman_lessons = [ | |
('tn', '', 'home row-first finger', 'ngrams'), | |
('he', '', 'home row-second finger', 'ngrams'), | |
('he', 'tn', 'home row-two fingers', 'words'), | |
# ('then', '', 'home row-two fingers', 'ngrams'), | |
('so', '', 'home row-third finger', 'ngrams'), | |
('so', 'then', 'home row-three fingers', 'words'), | |
('ai', '', 'home row-fourth finger', 'ngrams'), | |
('asht', '', 'home row-left hand', 'words'), | |
('noei', '', 'home row-right hand', 'words'), | |
('ashtneoi', '', 'home row-both hands', 'ngrams'), | |
('ashtneoi', '', 'home row-both hands', 'words'), | |
('lr', '', 'lr', 'ngrams'), | |
('lr', 'ashtneoi', 'homerow + lr', 'ngrams'), | |
('lr', 'ashtneoi', 'homerow + lr', 'words'), | |
('cudp', '', 'cudp', 'ngrams'), | |
('cudp', 'ashtneoi', 'cudp + homerow', 'ngrams'), | |
('cudp', 'ashtneoi', 'cudp + homerow', 'words'), | |
('gy', '', 'gy', 'ngrams'), | |
('gy', 'ashtneoi', 'gy + homerow', 'ngrams'), | |
('gy', 'ashtneoi', 'gy + homerow', 'words'), | |
('mfw', '', 'mfw', 'ngrams'), | |
('mfw', 'ashtneoi', 'mfw + homerow', 'ngrams'), | |
('mfw', 'ashtneoi', 'mfw + homerow', 'words'), | |
('kb', '', 'kb', 'ngrams'), | |
('kb', 'ashtneoi', 'kb + homerow', 'ngrams'), | |
('kb', 'ashtneoi', 'kb + homerow', 'words'), | |
('cudp', 'lrashtneoi', 'cumulative including cudp', 'words'), | |
('gy', 'cudplrashtneoi', 'cumulative including gy', 'words'), | |
('mfw', 'cudplrgyashtneoi', 'cumulative including mfw', 'words'), | |
('kb', 'mfwcudplrgyashtneoi', 'cumulative including kb', 'words'), | |
] | |
""" | |
given some letters to drill on: | |
- drill on ngrams | |
- drill on words | |
- 3x each then random order 3x | |
- if there's too many words, do a random sample. | |
""" | |
def ngrams(letters, extraletters, dictionary): | |
"""from the given set of letters, discover the frequency for which each | |
possible permutation appears within dictionary words. create a drill | |
according to this frequency. | |
if naive=True, don't calculate frequency, just determine if the | |
combination appears at all in any dictionary words. this is faster. | |
""" | |
possible_re = re.compile(r'[%s]{3,}' % (letters+extraletters)) | |
valid_re = re.compile(r'[%s]' % letters) | |
memo = collections.Counter() | |
for word in dictionary: | |
for match in possible_re.findall(word): | |
memo[match] += 1 | |
for ngram in memo: | |
if valid_re.search(ngram): | |
yield (memo[ngram], ngram) | |
def find_words(letters, extraletters, dictionary, min_length=2): | |
"""from the given set of letters, discover which words may be | |
created from them. | |
""" | |
word_re = re.compile(r'^[%s]{%s,}$' % (letters+extraletters, min_length)) | |
valid_re = re.compile(r'.*[%s]' % letters) | |
return (word for word in dictionary if word_re.match(word) and valid_re.match(word)) | |
def wordlesson(words): | |
"""generate a lesson from the given words: | |
- for each word, repeat 3x | |
- all words, random order, repeat 3x | |
""" | |
words = list(words) | |
for word in words: | |
yield " ".join([word] * 3) | |
for x in range(3): | |
random.shuffle(words) | |
yield " ".join(words) | |
def gtypist_lesson(label, words, title=None, instructions=None): | |
"""Given a title, instructions, and word list, generate lines in | |
gtypist format. | |
""" | |
tw = textwrap.TextWrapper(subsequent_indent=" :") | |
yield "*:%s" % label | |
if title: | |
tw.initial_indent="B:" | |
yield tw.fill(title) | |
if instructions: | |
tw.initial_indent="T:" | |
yield tw.fill(instructions) | |
tw.initial_indent="D:" | |
yield tw.fill(" ".join(words)) | |
def menu(labels): | |
yield "*:MENU" | |
yield "B:Eucalyn Lessons" | |
yield 'M: UP=_EXIT "These are lessons for learning the Eucalyn keyboard layout."' | |
for label, title in labels: | |
yield ' :%s "%s"' % (label, title) | |
def full_lesson(dictionary): | |
wordsused = set() | |
labels = [] | |
for n, (letters, extraletters, description, lessontype) in enumerate(eucalyn_lessons): | |
if lessontype == 'ngrams': | |
words = ngrams(letters, extraletters, dictionary) | |
# omit 0-frequency ngrams | |
words = set(ng for ng in words if ng[0]) | |
# take 20 most-frequent ngrams | |
lesson_words = set(w for f, w in sorted(words, reverse=True)[:10]) | |
for x in letters: | |
lesson_words.add(x) | |
lesson_words = wordlesson(lesson_words) | |
instructions = " ".join(x.strip() for x in """ | |
a drill on word pieces including the letters | |
%s. (%d found)""".strip().splitlines()) % (", ".join(letters), len(words)) | |
title = "%s %s drill" % (description, lessontype) | |
label = "lesson_%d" % n | |
labels.append((label, title)) | |
yield gtypist_lesson( | |
label, | |
lesson_words, | |
title=title, | |
instructions=instructions) | |
elif lessontype == 'words': | |
words = set(find_words(letters, extraletters, dictionary)) | |
# don't use words from previous lessons | |
words -= wordsused | |
wordsused |= words | |
# if we have a lot of words, make a few lessons. | |
for lesson_number in range(min((len(words)-1)/10+1, 3)): | |
if len(words) >= 10: | |
lesson_words = set(random.sample(words, 10)) | |
else: | |
lesson_words = set(words) | |
words -= lesson_words | |
lesson_words = wordlesson(lesson_words) | |
instructions = " ".join(x.strip() for x in """ | |
a drill on words that include the letters %s. (Set | |
%d) (%d remaining in dictionary)""".strip().splitlines()) % ( | |
", ".join(letters), lesson_number, | |
len(words)) | |
title = "%s %s drill #%d" % (description, lessontype, lesson_number + 1) | |
label = "lesson_%d_%d" % (n, lesson_number) | |
labels.append((label, title)) | |
yield gtypist_lesson( | |
label, | |
lesson_words, | |
title=title, | |
instructions=instructions) | |
else: | |
raise ValueError("unknown lesson type", lessontype) | |
yield menu(labels) | |
def main(argv0): | |
dictionary_filename = '/usr/share/dict/american-english-huge' | |
dictionary_filename = '/usr/share/dict/words' | |
dictionary = set(word.strip() for word in open(dictionary_filename)) | |
print("G:MENU") | |
for lesson in full_lesson(dictionary): | |
print("\n".join(lesson)) | |
print() | |
if __name__=="__main__": | |
import sys | |
raise SystemExit(main(*sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment