Skip to content

Instantly share code, notes, and snippets.

@act65
Last active August 31, 2018 21:17
Show Gist options
  • Save act65/70988c5843f71b675abb570c311f09eb to your computer and use it in GitHub Desktop.
Save act65/70988c5843f71b675abb570c311f09eb to your computer and use it in GitHub Desktop.
make a word cloud from a bib file
import os
from PIL import Image, ImageColor
import numpy as np
from wordcloud import WordCloud, STOPWORDS
import argparse
def argumentparser():
parser = argparse.ArgumentParser(description='Make a word cloud of a bib file')
parser.add_argument('--bib_dir', type=str,
help='location of the bib file')
parser.add_argument('--save_dir', type=str, default='/tmp/',
help='location to save text')
parser.add_argument('--mask_dir', type=str, default=None,
help='location of mask')
parser.add_argument('--name', type=str, default='img',
help='location of mask')
return parser.parse_args()
def read_file(fname):
with open(fname) as f:
data = f.read()
return data
def parse_line(l):
if '=' in l:
s = l.split(' = ')[1]
else:
s = l
return s.replace('{{', '').replace('}}', '') + '\n'
def parse_bib_data(data):
text = ''
for paper in data.split('@'):
for line in paper.split('\n'):
if 'title' in line:
text += parse_line(line)
if 'abstract' in line:
text += parse_line(line)
return text
def sigmoid(x, temp=1.0):
return 1/(1+np.exp(-x/temp))
class CPPN():
def __init__(self, n_inputs, n_hidden, n_outputs):
self.weights1 = np.random.standard_normal([n_inputs, n_hidden])
self.bias1 = np.random.standard_normal([n_hidden])
self.weights2 = 3*np.random.standard_normal([n_hidden, n_outputs])
self.bias2 = np.random.standard_normal([n_outputs])
def __call__(self, x):
h = np.dot(x, self.weights1) + self.bias1
h = sigmoid(h)
y = np.dot(h, self.weights2) + self.bias2
return sigmoid(y)
class ColorFunc():
def __init__(self):
self.cppn = CPPN(4, 64, 3)
def __call__(self, word, font_size, position, orientation, font_path, random_state):
# TODO word vectors
orientation = 0 if orientation is None else orientation
x = np.array([p/1000 for p in position] + [orientation] + [font_size/100]).reshape([1, 4])
y = self.cppn(x)
y = (y*255).astype(np.int32).tolist() + [1]
return tuple(y[0])
if __name__ =='__main__':
args = argumentparser()
if os.path.exists(os.path.join(args.save_dir, 'raw.txt')):
text = read_file(os.path.join(args.save_dir, 'raw.txt'))
else:
bib_file = read_file(args.bib_dir)
text = parse_bib_data(bib_file)
with open(os.path.join(args.save_dir, 'raw.txt'), 'w') as f:
f.write(text)
if args.mask_dir is not None:
mask = np.array(Image.open(args.mask_dir))
wordcloud = WordCloud(max_font_size=120,
min_font_size=10,
# width=2048,
# height=1024,
mode='RGBA',
background_color=None,
mask=mask if args.mask_dir is not None else None,
color_func=ColorFunc(),
stopwords=set(STOPWORDS),
max_words=5000).generate(text)
image = wordcloud.to_image()
image.save(os.path.join(args.save_dir, args.name+'.png'), 'PNG')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment