As configured in my dotfiles.
start new:
tmux
start new with session name:
import re, collections | |
def get_stats(vocab): | |
pairs = collections.defaultdict(int) | |
for word, freq in vocab.items(): | |
symbols = word.split() | |
for i in range(len(symbols)-1): | |
pairs[symbols[i],symbols[i+1]] += freq | |
return pairs |
from graphviz import Digraph | |
import torch | |
from torch.autograd import Variable, Function | |
def iter_graph(root, callback): | |
queue = [root] | |
seen = set() | |
while queue: | |
fn = queue.pop() | |
if fn in seen: |
As configured in my dotfiles.
start new:
tmux
start new with session name:
import torch | |
import torch.nn as nn | |
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
seqs = ['gigantic_string','tiny_str','medium_str'] | |
# make <pad> idx 0 | |
vocab = ['<pad>'] + sorted(set(''.join(seqs))) | |
# make model |
''' Script for downloading all GLUE data. | |
Note: for legal reasons, we are unable to host MRPC. | |
You can either use the version hosted by the SentEval team, which is already tokenized, | |
or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually. | |
For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example). | |
You should then rename and place specific files in a folder (see below for an example). | |
mkdir MRPC | |
cabextract MSRParaphraseCorpus.msi -d MRPC |
# coding: utf-8 | |
import logging | |
import re | |
from collections import Counter | |
import numpy as np | |
import torch | |
from sklearn.datasets import fetch_20newsgroups | |
from torch.autograd import Variable |
"""Information Retrieval metrics | |
Useful Resources: | |
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
http://www.nii.ac.jp/TechReports/05-014E.pdf | |
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
""" | |
import numpy as np |
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
import numpy as np | |
import cPickle as pickle | |
import gym | |
# hyperparameters | |
H = 200 # number of hidden layer neurons | |
batch_size = 10 # every how many episodes to do a param update? | |
learning_rate = 1e-4 | |
gamma = 0.99 # discount factor for reward |
使用 Python 内置的 defaultdict
,我们可以很容易的定义一个树形数据结构:
def tree(): return defaultdict(tree)
就是这样!