karino2 · July 4, 2019 01:27 · karino2 · Jun 29, 2019
diff --git a/tegashiki_model.py b/tegashiki_model.py
 DROPOUT_RATE=0.5
 L2_REGULARIZATION_RATE=0.1

 FEATURE_EXTRACTER_KERNEL_SIZE=7

 FILTER_NUM=128
 KERNEL_SIZE=5


 # model_small
 EMBEDDING_SIZE=32
 OT_HIDDEN=128
 GRU_HIDDEN=128
 ATTENTION_ENC_HIDDEN=64
 ATTENTION_DEC_HIDDEN=64

 def feature_extractor(input_stroke_t, is_training_arg):
  """input_stroke_t shape (batch, MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)
     output: (batch, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM)"""
  
  is_training = False
  if(is_training_arg):
    is_training = None
  
  with tf.variable_scope("feature_extractor"):
    inpshape = input_stroke_t.shape
    x = tf.reshape(input_stroke_t, [-1, inpshape[2], inpshape[3]])
    # (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)

    x = Conv1D(32, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
    x = BatchNormalization()(x, training=is_training)
    x = Activation('relu')(x)
    # (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, 32)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
    # (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/2, 32)

    x = Conv1D(64, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
    x = BatchNormalization()(x, training=is_training)
    x = Activation('relu')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
    # (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/4, 64)

    x = Conv1D(EXTRACTED_FEATURE_DIM, 7, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
    x = BatchNormalization()(x, training=is_training)
    x = Activation('relu')(x)
    x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
    x = GlobalMaxPooling1D()(x)
    x = tf.reshape(x, [-1, inpshape[1], EXTRACTED_FEATURE_DIM])
    return x

 # dynamic shape cause TPUEstimator export to fail...
 def myembedding(input, num_classes, embedding_size, seq_num, name):
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    randinitializer = lambda: tf.random_uniform([num_classes, embedding_size], -0.05, 0.05)

    embedmat = tf.get_variable(name, initializer = randinitializer)
    onehot = tf.one_hot(input, num_classes)
    flatten_onehot = tf.reshape(onehot, [-1, num_classes])
    return tf.reshape(tf.matmul(flatten_onehot, embedmat), [-1, seq_num, embedding_size])

 def embed_stroke(stroke_features):
  pos_stroke = tf.range(
            0,
            tf.shape(stroke_features)[1],
            delta=1,
            dtype=tf.int32,
            name='range')
  pos_stroke = tf.expand_dims(pos_stroke, axis=0)
  pos_stroke_embed = myembedding(pos_stroke, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM, MAX_STROKE_NUM, "stroke_pos_embed")
  
  stroke_pos_embedded = stroke_features + tf.cast(x=pos_stroke_embed, dtype=stroke_features.dtype)
  return stroke_pos_embedded

 def encConv1D(filternum, kernelsize, input):
  return Conv1D(filternum, kernelsize, activation='relu', padding='same', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(input)

 def encSelfAttenBlock(input):
  context_vec = attention_context(input, input, MAX_STROKE_NUM)
  
  attenres = tf.contrib.layers.layer_norm(input+context_vec)
  x = encConv1D(2048, 1, attenres)
  x = encConv1D(512, 1, x)
  return tf.contrib.layers.layer_norm(attenres+x)

 def encoder_SelfAttention(input):
  x = encConv1D(512, 1, input)
  x = encSelfAttenBlock(x)
  x = encSelfAttenBlock(x)
  x = encSelfAttenBlock(x)
  x = encSelfAttenBlock(x)
  x = encSelfAttenBlock(x)
  x = encSelfAttenBlock(x)
  return x

 def embed_decoder(decoder_input_t):
  dec_input_embedded = myembedding(decoder_input_t, VOCAB_SIZE, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_embed")
  
  dec_pos_input = tf.range(
            0,
            tf.shape(decoder_input_t)[1],
            delta=1,
            dtype=tf.int32,
            name='range')
  
  dec_pos_input = tf.expand_dims(dec_pos_input, axis=0)
  dec_pos_embed = myembedding(dec_pos_input, MAX_TOKEN_LEN, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_pos_embed")
    
  dec_embedded = dec_input_embedded + tf.cast(x=dec_pos_embed, dtype=dec_input_embedded.dtype)
  return dec_embedded

 def attention_context(ht_enc, ht_dec, maxtklen):
  w1 = Dense(ATTENTION_ENC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_enc)
  w2 = Dense(ATTENTION_DEC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_dec)

  w2_widen = tf.expand_dims(w2, axis=1)
  w1_widen = tf.expand_dims(w1, axis=2)

  w1_widen_repeat = K.repeat_elements(w1_widen, rep=maxtklen, axis=2)
  
  score =tf.nn.tanh(w1_widen_repeat+w2_widen)
  prob = Dense(1, activation="softmax", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(score)

  ht_enc_repeated = K.repeat_elements(tf.expand_dims(ht_enc, axis=2), rep=maxtklen, axis=2)
  context_vec = tf.reduce_sum(prob*ht_enc_repeated, axis=1)

  return context_vec  

 def decoder_CnnWithAttentionBlock(dec_input, ht_enc, is_training):
  x = Conv1D(FILTER_NUM, KERNEL_SIZE, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_input)
  # This will cause future information leak!
  # x = tf.contrib.layers.layer_norm(x)
  ht_dec = SpatialDropout1D(DROPOUT_RATE)(x, training=is_training)
  
  context_vec = attention_context(ht_enc, ht_dec, MAX_TOKEN_LEN)
  
  ht_with_cont = Concatenate()([ht_dec, context_vec])
  pw_conved = Conv1D(1024, 1, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_with_cont)
  
  return SpatialDropout1D(DROPOUT_RATE)(pw_conved, training=is_training)

 SCALE = math.sqrt(0.5)

 def create_model(input_stroke_t, decoder_input_t, is_training):
  stroke_features = feature_extractor(input_stroke_t, is_training)
  
  stroke_embedded = embed_stroke(stroke_features)
  dec_embedded = embed_decoder(decoder_input_t)
  

  ht_enc = encoder_CNN(stroke_embedded, is_training)
 
  dec_ht = decoder_CnnWithAttentionBlock(dec_embedded, ht_enc, is_training)

  ot = Dense(OT_HIDDEN, activation="tanh", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_ht)
  logit = TimeDistributed(Dense(VOCAB_SIZE, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE)))(ot)

  return logit
	DROPOUT_RATE=0.5
	L2_REGULARIZATION_RATE=0.1

	FEATURE_EXTRACTER_KERNEL_SIZE=7

	FILTER_NUM=128
	KERNEL_SIZE=5


	# model_small
	EMBEDDING_SIZE=32
	OT_HIDDEN=128
	GRU_HIDDEN=128
	ATTENTION_ENC_HIDDEN=64
	ATTENTION_DEC_HIDDEN=64

	def feature_extractor(input_stroke_t, is_training_arg):
	"""input_stroke_t shape (batch, MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)
	output: (batch, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM)"""

	is_training = False
	if(is_training_arg):
	is_training = None

	with tf.variable_scope("feature_extractor"):
	inpshape = input_stroke_t.shape
	x = tf.reshape(input_stroke_t, [-1, inpshape[2], inpshape[3]])
	# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, INPUT_TYPE_DIM)

	x = Conv1D(32, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
	x = BatchNormalization()(x, training=is_training)
	x = Activation('relu')(x)
	# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN, 32)
	x = MaxPooling1D(pool_size=2)(x)
	x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
	# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/2, 32)

	x = Conv1D(64, FEATURE_EXTRACTER_KERNEL_SIZE, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
	x = BatchNormalization()(x, training=is_training)
	x = Activation('relu')(x)
	x = MaxPooling1D(pool_size=2)(x)
	x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
	# (batch*MAX_STROKE_NUM, MAX_ONE_STROKE_LEN/4, 64)

	x = Conv1D(EXTRACTED_FEATURE_DIM, 7, kernel_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(FE_L2_REGULARIZATION_RATE))(x)
	x = BatchNormalization()(x, training=is_training)
	x = Activation('relu')(x)
	x = Dropout(FE_DROPOUT_RATE)(x, training=is_training)
	x = GlobalMaxPooling1D()(x)
	x = tf.reshape(x, [-1, inpshape[1], EXTRACTED_FEATURE_DIM])
	return x

	# dynamic shape cause TPUEstimator export to fail...
	def myembedding(input, num_classes, embedding_size, seq_num, name):
	with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
	randinitializer = lambda: tf.random_uniform([num_classes, embedding_size], -0.05, 0.05)

	embedmat = tf.get_variable(name, initializer = randinitializer)
	onehot = tf.one_hot(input, num_classes)
	flatten_onehot = tf.reshape(onehot, [-1, num_classes])
	return tf.reshape(tf.matmul(flatten_onehot, embedmat), [-1, seq_num, embedding_size])

	def embed_stroke(stroke_features):
	pos_stroke = tf.range(
	0,
	tf.shape(stroke_features)[1],
	delta=1,
	dtype=tf.int32,
	name='range')
	pos_stroke = tf.expand_dims(pos_stroke, axis=0)
	pos_stroke_embed = myembedding(pos_stroke, MAX_STROKE_NUM, EXTRACTED_FEATURE_DIM, MAX_STROKE_NUM, "stroke_pos_embed")

	stroke_pos_embedded = stroke_features + tf.cast(x=pos_stroke_embed, dtype=stroke_features.dtype)
	return stroke_pos_embedded

	def encConv1D(filternum, kernelsize, input):
	return Conv1D(filternum, kernelsize, activation='relu', padding='same', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(input)

	def encSelfAttenBlock(input):
	context_vec = attention_context(input, input, MAX_STROKE_NUM)

	attenres = tf.contrib.layers.layer_norm(input+context_vec)
	x = encConv1D(2048, 1, attenres)
	x = encConv1D(512, 1, x)
	return tf.contrib.layers.layer_norm(attenres+x)

	def encoder_SelfAttention(input):
	x = encConv1D(512, 1, input)
	x = encSelfAttenBlock(x)
	x = encSelfAttenBlock(x)
	x = encSelfAttenBlock(x)
	x = encSelfAttenBlock(x)
	x = encSelfAttenBlock(x)
	x = encSelfAttenBlock(x)
	return x

	def embed_decoder(decoder_input_t):
	dec_input_embedded = myembedding(decoder_input_t, VOCAB_SIZE, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_embed")

	dec_pos_input = tf.range(
	0,
	tf.shape(decoder_input_t)[1],
	delta=1,
	dtype=tf.int32,
	name='range')

	dec_pos_input = tf.expand_dims(dec_pos_input, axis=0)
	dec_pos_embed = myembedding(dec_pos_input, MAX_TOKEN_LEN, EMBEDDING_SIZE, MAX_TOKEN_LEN, "dec_pos_embed")

	dec_embedded = dec_input_embedded + tf.cast(x=dec_pos_embed, dtype=dec_input_embedded.dtype)
	return dec_embedded

	def attention_context(ht_enc, ht_dec, maxtklen):
	w1 = Dense(ATTENTION_ENC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_enc)
	w2 = Dense(ATTENTION_DEC_HIDDEN, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_dec)

	w2_widen = tf.expand_dims(w2, axis=1)
	w1_widen = tf.expand_dims(w1, axis=2)

	w1_widen_repeat = K.repeat_elements(w1_widen, rep=maxtklen, axis=2)

	score =tf.nn.tanh(w1_widen_repeat+w2_widen)
	prob = Dense(1, activation="softmax", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(score)

	ht_enc_repeated = K.repeat_elements(tf.expand_dims(ht_enc, axis=2), rep=maxtklen, axis=2)
	context_vec = tf.reduce_sum(prob*ht_enc_repeated, axis=1)

	return context_vec

	def decoder_CnnWithAttentionBlock(dec_input, ht_enc, is_training):
	x = Conv1D(FILTER_NUM, KERNEL_SIZE, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_input)
	# This will cause future information leak!
	# x = tf.contrib.layers.layer_norm(x)
	ht_dec = SpatialDropout1D(DROPOUT_RATE)(x, training=is_training)

	context_vec = attention_context(ht_enc, ht_dec, MAX_TOKEN_LEN)

	ht_with_cont = Concatenate()([ht_dec, context_vec])
	pw_conved = Conv1D(1024, 1, activation='relu', padding='causal', kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(ht_with_cont)

	return SpatialDropout1D(DROPOUT_RATE)(pw_conved, training=is_training)

	SCALE = math.sqrt(0.5)

	def create_model(input_stroke_t, decoder_input_t, is_training):
	stroke_features = feature_extractor(input_stroke_t, is_training)

	stroke_embedded = embed_stroke(stroke_features)
	dec_embedded = embed_decoder(decoder_input_t)


	ht_enc = encoder_CNN(stroke_embedded, is_training)

	dec_ht = decoder_CnnWithAttentionBlock(dec_embedded, ht_enc, is_training)

	ot = Dense(OT_HIDDEN, activation="tanh", kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE))(dec_ht)
	logit = TimeDistributed(Dense(VOCAB_SIZE, kernel_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), bias_regularizer=regularizers.l2(L2_REGULARIZATION_RATE), activity_regularizer=regularizers.l2(L2_REGULARIZATION_RATE)))(ot)

	return logit