Skip to content

Instantly share code, notes, and snippets.

@LukeB42
Last active December 31, 2015 16:01
Show Gist options
  • Save LukeB42/afa1201f8076314caf8f to your computer and use it in GitHub Desktop.
Save LukeB42/afa1201f8076314caf8f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Use PyMarkovChain to generate markov chains from news articles.
import sys, os
from emissary.models import Article
from pymarkovchain import MarkovChain
mc = MarkovChain(os.getenv('HOME') + os.sep + '.markovdb')
if __name__ == "__main__":
if len(sys.argv) < 3:
print "Usage: %s <uid> [<uid>] <#lines>" % sys.argv[0]
raise SystemExit
num_lines = sys.argv[2]
article = Article.query.filter(Article.uid == sys.argv[1]).first()
if not article:
print "Couldn't find an article with that UID."
raise SystemExit
if not article.jsonify()['content_available']:
print "No content available for %s" % sys.argv[1]
raise SystemExit
content = article.jsonify(content=True)['content'].decode("utf-8","ignore")
if num_lines.isalnum():
num_lines = sys.argv[3]
article = Article.query.filter(Article.uid == sys.argv[2]).first()
if not article:
print "Couldn't find an article with that UID."
raise SystemExit
if not article.jsonify()['content_available']:
print "No content available for %s" % sys.argv[2]
raise SystemExit
content += article.jsonify(content=True)['content'].decode("utf-8","ignore")
mc.generateDatabase(content)
for i in range(int(num_lines)):
print mc.generateString()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment