Skip to content

Instantly share code, notes, and snippets.

@karino2
Last active July 4, 2019 01:27
Show Gist options
  • Save karino2/b889ef4d70226b829d86ac26390bb40e to your computer and use it in GitHub Desktop.
Save karino2/b889ef4d70226b829d86ac26390bb40e to your computer and use it in GitHub Desktop.
Tegashiki model
DROPOUT_RATE=0.5
L2_REGULARIZATION_RATE=0.1
FEATURE_EXTRACTER_KERNEL_SIZE=7
FILTER_NUM=128
KERNEL_SIZE=5
# model_small
EMBEDDING_SIZE=32
OT_HIDDEN=128
GRU_HIDDEN=128
ATTENTION_ENC_HIDDEN=64
ATTENTION_DEC_HIDDEN=64
def feature_extractor(input_stroke_t, is_training_arg):
"""input_stroke_t shape (batch, MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)
output: (batch, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM)"""
is_training = False
if(is_training_arg):
is_training = None
with tf.variable_scope("feature_extractor"):
inpshape = input_stroke_t.shape
x = tf.reshape(input_stroke_t, [-1, inpshape[2], inpshape[3]])
# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)
x = Conv1D(32, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
x = BatchNormalization()(x, training=is_training)
x = Activation('relu')(x)
# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, 32)
x = MaxPooling1D(pool_size=2)(x)
x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/2, 32)
x = Conv1D(64, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
x = BatchNormalization()(x, training=is_training)
x = Activation('relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/4, 64)
x = Conv1D(EXTRACTED_FEATURE_DIM, 7, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
x = BatchNormalization()(x, training=is_training)
x = Activation('relu')(x)
x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
x = GlobalMaxPooling1D()(x)
x = tf.reshape(x, [-1, inpshape[1], EXTRACTED_FEATURE_DIM])
return x
# dynamic shape cause TPUEstimator export to fail...
def myembedding(input, num_classes, embedding_size, seq_num, name):
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
randinitializer = lambda: tf.random_uniform([num_classes, embedding_size], -0.05, 0.05)
embedmat = tf.get_variable(name, initializer = randinitializer)
onehot = tf.one_hot(input, num_classes)
flatten_onehot = tf.reshape(onehot, [-1, num_classes])
return tf.reshape(tf.matmul(flatten_onehot, embedmat), [-1, seq_num, embedding_size])
def embed_stroke(stroke_features):
pos_stroke = tf.range(
0,
tf.shape(stroke_features)[1],
delta=1,
dtype=tf.int32,
name='range')
pos_stroke = tf.expand_dims(pos_stroke, axis=0)
pos_stroke_embed = myembedding(pos_stroke, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM, MAX_STROKE_NUM, "stroke_pos_embed")
stroke_pos_embedded = stroke_features + tf.cast(x=pos_stroke_embed, dtype=stroke_features.dtype)
return stroke_pos_embedded
def encConv1D(filternum, kernelsize, input):
return Conv1D(filternum, kernelsize, activation='relu', padding='same', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(input)
def encSelfAttenBlock(input):
context_vec = attention_context(input, input, MAX_STROKE_NUM)
attenres = tf.contrib.layers.layer_norm(input+context_vec)
x = encConv1D(2048, 1, attenres)
x = encConv1D(512, 1, x)
return tf.contrib.layers.layer_norm(attenres+x)
def encoder_SelfAttention(input):
x = encConv1D(512, 1, input)
x = encSelfAttenBlock(x)
x = encSelfAttenBlock(x)
x = encSelfAttenBlock(x)
x = encSelfAttenBlock(x)
x = encSelfAttenBlock(x)
x = encSelfAttenBlock(x)
return x
def embed_decoder(decoder_input_t):
dec_input_embedded = myembedding(decoder_input_t, VOCAB_SIZE, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_embed")
dec_pos_input = tf.range(
0,
tf.shape(decoder_input_t)[1],
delta=1,
dtype=tf.int32,
name='range')
dec_pos_input = tf.expand_dims(dec_pos_input, axis=0)
dec_pos_embed = myembedding(dec_pos_input, MAX_TOKEN_LEN, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_pos_embed")
dec_embedded = dec_input_embedded + tf.cast(x=dec_pos_embed, dtype=dec_input_embedded.dtype)
return dec_embedded
def attention_context(ht_enc, ht_dec, maxtklen):
w1 = Dense(ATTENTION_ENC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_enc)
w2 = Dense(ATTENTION_DEC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_dec)
w2_widen = tf.expand_dims(w2, axis=1)
w1_widen = tf.expand_dims(w1, axis=2)
w1_widen_repeat = K.repeat_elements(w1_widen, rep=maxtklen, axis=2)
score =tf.nn.tanh(w1_widen_repeat+w2_widen)
prob = Dense(1, activation="softmax", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(score)
ht_enc_repeated = K.repeat_elements(tf.expand_dims(ht_enc, axis=2), rep=maxtklen, axis=2)
context_vec = tf.reduce_sum(prob*ht_enc_repeated, axis=1)
return context_vec
def decoder_CnnWithAttentionBlock(dec_input, ht_enc, is_training):
x = Conv1D(FILTER_NUM, KERNEL_SIZE, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_input)
# This will cause future information leak!
# x = tf.contrib.layers.layer_norm(x)
ht_dec = SpatialDropout1D(DROPOUT_RATE)(x, training=is_training)
context_vec = attention_context(ht_enc, ht_dec, MAX_TOKEN_LEN)
ht_with_cont = Concatenate()([ht_dec, context_vec])
pw_conved = Conv1D(1024, 1, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_with_cont)
return SpatialDropout1D(DROPOUT_RATE)(pw_conved, training=is_training)
SCALE = math.sqrt(0.5)
def create_model(input_stroke_t, decoder_input_t, is_training):
stroke_features = feature_extractor(input_stroke_t, is_training)
stroke_embedded = embed_stroke(stroke_features)
dec_embedded = embed_decoder(decoder_input_t)
ht_enc = encoder_CNN(stroke_embedded, is_training)
dec_ht = decoder_CnnWithAttentionBlock(dec_embedded, ht_enc, is_training)
ot = Dense(OT_HIDDEN, activation="tanh", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_ht)
logit = TimeDistributed(Dense(VOCAB_SIZE, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE)))(ot)
return logit
@karino2
Copy link
Author

karino2 commented Jun 29, 2019

convencdec_rescon_mulsqrt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment