Skip to content

Instantly share code, notes, and snippets.

View groverpr's full-sized avatar

Prince Grover groverpr

View GitHub Profile
def load_base_model(model_path, epoch, ctx, layer_name=None, n_inputs=2):
""" Loads the model from given model path
and returns a subnetwork that gives output from layer_name
"""
net = gluon.nn.SymbolBlock.imports(
model_path + "-symbol.json",
['data%i' % i for i in range(n_inputs)],
model_path + "-%.4d.params" % epoch,
ctx=ctx,
)
few_words = ['great', 'excellent', 'best', 'perfect', 'wonderful', 'well',
'fun', 'love', 'amazing', 'also', 'enjoyed', 'favorite', 'it',
'and', 'loved', 'highly', 'bit', 'job', 'today', 'beautiful',
'you', 'definitely', 'superb', 'brilliant', 'world', 'liked',
'still', 'enjoy', 'life', 'very', 'especially', 'see', 'fantastic',
'both', 'shows', 'good', 'may', 'terrific', 'heart', 'classic',
'will', 'enjoyable', 'beautifully', 'always', 'true', 'perfectly',
'surprised', 'think', 'outstanding', 'most',
'bad', 'worst', 'awful', 'waste', 'boring', 'poor', 'terrible',
def train(network,
train_data,
holdout_data,
loss,
epochs,
ctx,
lr=1e-2,
wd=1e-5,
optimizer='adam'):
# Preparation of network arguments
ctx = [mx.gpu(0)] # use a GPU
tt = transformer_pipe.named_steps['token2index'] # to get token to integer map
max_idx = max(tt.tok2idx.values())+1 # size of vocabulary of all tokens in training data
tok_embed_dim = 64 # embedding size of each token
review_embed_dim = 50 # embedding size of hidden state in GRU
input_output_embed_map = {"token_embed": (max_idx, tok_embed_dim),
"hidden_embed": (None, review_embed_dim)}
class CustomSeqNet(gluon.nn.HybridBlock):
"""
Custom defined network for sequence data that is used to predict a binary label.
"""
def __init__(self, input_output_embed_map, dense_sizes=[100], dropouts=[0.2], activation="relu"):
"""
input_output_embed_map: {"token_embed": (max_tok_idx, tok_embed_dim), "hidden_embed": (,hidden_embed_dim))}
"""
self.dense_sizes = dense_sizes # list of output dimension of dense layers
def get_dataloader(dataset,
dataset_type="train", # valid/test
batch_size=256,
bucket_num=5,
shuffle=True, # true for training
num_workers=1):
# Batchify function appends the length of each sequence to feed as addtional input
combined_batchify_fn = nlp.data.batchify.Tuple(
nlp.data.batchify.Pad(axis=0, ret_length=True),
@groverpr
groverpr / transfer_learn_mxnet_notebook.ipynb
Created February 25, 2020 02:02
Full notebook for transfer learning tutorial using MXNet
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
class Tokenize(BaseEstimator, TransformerMixin):
"""
Takes in pandas series and applies tokenization on each row based on given split pattern.
"""
def __init__(self, split_pat=f"([{string.punctuation}])"):
self.split_pat = split_pat # re pattern used to split string to tokens. default splits over any string punctuation
def tokenize(self, s):
""" Tokenize string """
re_tok = re.compile(self.split_pat)
# Transformers present in Pipeline work in series. 1->2->3->4
transformer_pipe = Pipeline(steps=[
("null_impute", NullImputer(strategy="constant", fill_value="null")),
("lower_case", LowerCaser()),
("tokenize", Tokenize(f"([{string.punctuation}])")),
("token2index", Tok2Idx())
])
X_train_transformed = transformer_pipe.fit_transform(X_train) # fit and transform on train data
X_valid_transformed = transformer_pipe.transform(X_valid)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.