Last active
August 31, 2018 21:17
-
-
Save act65/70988c5843f71b675abb570c311f09eb to your computer and use it in GitHub Desktop.
make a word cloud from a bib file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from PIL import Image, ImageColor | |
import numpy as np | |
from wordcloud import WordCloud, STOPWORDS | |
import argparse | |
def argumentparser(): | |
parser = argparse.ArgumentParser(description='Make a word cloud of a bib file') | |
parser.add_argument('--bib_dir', type=str, | |
help='location of the bib file') | |
parser.add_argument('--save_dir', type=str, default='/tmp/', | |
help='location to save text') | |
parser.add_argument('--mask_dir', type=str, default=None, | |
help='location of mask') | |
parser.add_argument('--name', type=str, default='img', | |
help='location of mask') | |
return parser.parse_args() | |
def read_file(fname): | |
with open(fname) as f: | |
data = f.read() | |
return data | |
def parse_line(l): | |
if '=' in l: | |
s = l.split(' = ')[1] | |
else: | |
s = l | |
return s.replace('{{', '').replace('}}', '') + '\n' | |
def parse_bib_data(data): | |
text = '' | |
for paper in data.split('@'): | |
for line in paper.split('\n'): | |
if 'title' in line: | |
text += parse_line(line) | |
if 'abstract' in line: | |
text += parse_line(line) | |
return text | |
def sigmoid(x, temp=1.0): | |
return 1/(1+np.exp(-x/temp)) | |
class CPPN(): | |
def __init__(self, n_inputs, n_hidden, n_outputs): | |
self.weights1 = np.random.standard_normal([n_inputs, n_hidden]) | |
self.bias1 = np.random.standard_normal([n_hidden]) | |
self.weights2 = 3*np.random.standard_normal([n_hidden, n_outputs]) | |
self.bias2 = np.random.standard_normal([n_outputs]) | |
def __call__(self, x): | |
h = np.dot(x, self.weights1) + self.bias1 | |
h = sigmoid(h) | |
y = np.dot(h, self.weights2) + self.bias2 | |
return sigmoid(y) | |
class ColorFunc(): | |
def __init__(self): | |
self.cppn = CPPN(4, 64, 3) | |
def __call__(self, word, font_size, position, orientation, font_path, random_state): | |
# TODO word vectors | |
orientation = 0 if orientation is None else orientation | |
x = np.array([p/1000 for p in position] + [orientation] + [font_size/100]).reshape([1, 4]) | |
y = self.cppn(x) | |
y = (y*255).astype(np.int32).tolist() + [1] | |
return tuple(y[0]) | |
if __name__ =='__main__': | |
args = argumentparser() | |
if os.path.exists(os.path.join(args.save_dir, 'raw.txt')): | |
text = read_file(os.path.join(args.save_dir, 'raw.txt')) | |
else: | |
bib_file = read_file(args.bib_dir) | |
text = parse_bib_data(bib_file) | |
with open(os.path.join(args.save_dir, 'raw.txt'), 'w') as f: | |
f.write(text) | |
if args.mask_dir is not None: | |
mask = np.array(Image.open(args.mask_dir)) | |
wordcloud = WordCloud(max_font_size=120, | |
min_font_size=10, | |
# width=2048, | |
# height=1024, | |
mode='RGBA', | |
background_color=None, | |
mask=mask if args.mask_dir is not None else None, | |
color_func=ColorFunc(), | |
stopwords=set(STOPWORDS), | |
max_words=5000).generate(text) | |
image = wordcloud.to_image() | |
image.save(os.path.join(args.save_dir, args.name+'.png'), 'PNG') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment