Last active
December 18, 2016 20:17
-
-
Save temoto/4ce8dddfd74a9fc7983e6922b307e38c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import bisect | |
import itertools | |
import sys | |
# % cat wordlist | |
# a b c | |
# pig bird | |
# rig sig | |
# % cat input | |
# 2473 | |
# bird vine | |
# windows | |
# % ./phoneword.py -words wordlist <input | |
# 2473 bird | |
# bird 2473 | |
# vine 8463 | |
# windows 9463697 | |
cmdline = argparse.ArgumentParser() | |
cmdline.add_argument('-debug', default=False, action='store_true') | |
cmdline.add_argument('-test', default=False, action='store_true', help='run internal tests') | |
cmdline.add_argument('-words', metavar='FILE', required=True, help='path to words dictionary') | |
keymap = { | |
'1': '', '2': 'abcабвг', '3': 'defдеёжз', | |
'4': 'ghiийкл', '5': 'jklмноп', '6': 'mnoрсту', | |
'7': 'pqrsфхцч', '8': 'tuvшщъы', '9': 'wxyzьэюя', | |
'0': '', | |
} | |
alpha_num_map = {l: d for d, word in keymap.items() for l in word} | |
config_languages = ( | |
'abcdefghijklmnopqrstuvwxyz', | |
'абвгдеёжзиклмнопрстуфхцчшщъыьэюя', | |
) | |
language_map = {c: i for i, cs in enumerate(config_languages) for c in cs} | |
words = set() | |
words_sorted = [] | |
debug = False | |
def log_debug(msg): | |
if debug: | |
sys.stderr.write('DEBUG ' + msg + '\n') | |
def log(msg): | |
sys.stderr.write(msg + '\n') | |
def single_language(w): | |
ls = {language_map[c] for c in w if c in language_map} | |
return len(ls) == 1 | |
def words_like(win): | |
i1 = bisect.bisect_left(words_sorted, win) | |
i2 = bisect.bisect_right(words_sorted, win + 'Ω') | |
ws = [w for w in words_sorted[i1:i2+1] if w.startswith(win)] | |
log_debug('like({win}) i1={i1} i2={i2} result={ws}'.format(**locals())) | |
yield from ws | |
def translate(w): | |
log_debug('translate({0})'.format(w)) | |
if w.isdigit(): | |
g = (''.join(row) for row in itertools.product(*[keymap[d] for d in w]) if single_language(row)) | |
if debug: | |
g = tuple(g) | |
log_debug('translate({0}) g={1}'.format(w, g)) | |
xs = tuple(x for w in g for x in words_like(w)) | |
return xs | |
else: # word | |
return (''.join(alpha_num_map.get(l, '') for l in w),) | |
def run_tests(): | |
assert '2473' in translate('bird') | |
assert set('abc') == set(translate('2')) | |
assert 'pig' in translate('744') | |
def main(): | |
flags = cmdline.parse_args() | |
if flags.debug: | |
global debug | |
debug = True | |
try: | |
with open(flags.words, 'rt') as f: | |
for line in f: | |
words.update(line.split()) | |
except IOError as e: | |
log('error reading words from {}'.format(flags.words)) | |
log(e) | |
return 1 | |
words_sorted.extend(sorted(words)) | |
if flags.test: | |
return run_tests() | |
for line in sys.stdin: | |
for word in line.split(): | |
for t in translate(word): | |
print('{0}\t{1}'.format(word, t)) | |
if __name__ == '__main__': | |
try: | |
sys.exit(main()) | |
except KeyboardInterrupt: | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment