Created
January 6, 2022 19:50
-
-
Save thouis/721e970685827013f17220f7f8b70a73 to your computer and use it in GitHub Desktop.
Wordle Solver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from urllib import request | |
import numpy as np | |
import pandas as pd | |
# grab a large corpus of words, sorted by usage counts (Peter Norvig) | |
if not os.path.isfile('count_1w.txt'): | |
request.urlretrieve("https://norvig.com/ngrams/count_1w.txt", | |
"count_1w.txt") | |
# all valid Wordle words are 5 letters, not proper nouns (no capitals) - take top 5k | |
with open('count_1w.txt') as file: | |
words = list(w for w, _ in (line.strip().split('\t') for line in file) if len(w) == 5)[:5_000] | |
# or use the dict | |
# words = [w.strip() for w in open("/usr/share/dict/words", "r") if len(w) == 6 and w.lower() == w] | |
# stick in pandas series for speed | |
words = pd.DataFrame(index=words) | |
words['word'] = words.index # hamlet | |
print(len(words), "words loaded") | |
def wordle_pattern(guess, target): | |
out = ['.'] * 5 | |
counts = {} | |
# identify matches | |
for i, (a, b) in enumerate(zip(guess, target)): | |
if a == b: | |
out[i] = 'g' | |
else: | |
counts[b] = counts.get(b, 0) + 1 | |
# identify present but wrong place | |
for i, (a, b) in enumerate(zip(guess, target)): | |
if a != b: | |
if counts.get(a, 0) > 0: | |
out[i] = 'y' | |
counts[a] -= 1 | |
return "".join(out) | |
niters = 100 | |
words['score'] = np.log(len(words)) # prior of being uninformative | |
# by hand filtering as proof of concept - target from 2022-01-06 | |
# | |
# words['pattern'] = [wordle_pattern('aisle', target) for target in words.word] | |
# words = words[words.pattern == 'y..y.'] | |
# | |
# words['pattern'] = [wordle_pattern('ultra', target) for target in words.word] | |
# words = words[words.pattern == '.y..y'] | |
# | |
# words['pattern'] = [wordle_pattern('waldo', target) for target in words.word] | |
# words = words[words.pattern == '.gy..'] | |
# | |
# words['pattern'] = [wordle_pattern('naval', target) for target in words.word] | |
# words = words[words.pattern == 'yg.gg'] | |
# | |
# print(len(words), "words filtered") | |
for iterx in range(niters): | |
# choose a random word for target | |
target = words.word.sample(1).iloc[0] | |
# generate the pattern for each word as a guess | |
words['pattern'] = [wordle_pattern(guess, target) for guess in words.word] | |
# count how large each pattern group is | |
pat_count = words.pattern.value_counts() | |
# transform to entropy (log pattern group size) | |
log_pat_count = np.log(pat_count) | |
# add to score | |
words['score'] += words.pattern.map(log_pat_count) | |
print(words.sort_values("score").head(20)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment