Created
February 8, 2014 16:01
-
-
Save humbhenri/8885895 to your computer and use it in GitHub Desktop.
Markov Chain implementation in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Markov Chain Algorithm (The Practice of Programming pg 62) | |
# set w1 and w2 to be the first two words in the text | |
# print w1 and w2 | |
# loop: | |
# randomly choose w3, one of successors of prefix w1 and w2 | |
# print w3 | |
# replace w1 and w2 by w2 and w3 | |
# repeat loop | |
import random | |
import sys | |
NONWORD='NONONON' | |
NPREF=2 | |
class Markov(object): | |
def __init__(self): | |
self.init_preffix() | |
self.states = {} | |
def init_preffix(self): | |
self.preffix = tuple([NONWORD for i in xrange(0, NPREF)]) | |
def words(self, f): | |
for line in f: | |
for word in line.split(): | |
yield word | |
def update_prefix(self, suffix): | |
self.preffix = self.preffix[1:] + (suffix,) | |
def add(self, suffix): | |
if self.states.get(self.preffix) is None: | |
self.states[self.preffix] = [] | |
self.states[self.preffix].append(suffix) | |
self.update_prefix(suffix) | |
def build(self, f): | |
for suffix in self.words(f): | |
self.add(suffix) | |
def generate(self, nwords): | |
self.init_preffix() | |
for i in xrange(0, nwords): | |
suffix_list = self.states.get(self.preffix) | |
if suffix_list is None : | |
break | |
next_word = random.choice(suffix_list) | |
if next_word == NONWORD: | |
break | |
print next_word | |
self.update_prefix(next_word) | |
if __name__ == '__main__': | |
markov = Markov() | |
f = open(sys.argv[1], 'r') | |
markov.build(f) | |
markov.generate(10000) | |
f.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment