Skip to content

Instantly share code, notes, and snippets.

@deepanshu-yadav
Created June 7, 2022 10:41
Show Gist options
  • Save deepanshu-yadav/e80d00e6218ff2bd95bf81b3947f5da4 to your computer and use it in GitHub Desktop.
Save deepanshu-yadav/e80d00e6218ff2bd95bf81b3947f5da4 to your computer and use it in GitHub Desktop.
# we will utilize our class for creating generator.
train_gen = CustomGenerator(train_data_descriptor, BATCH_SIZE, min_max_scaler_train)
valid_gen = CustomGenerator(validation_data_descriptor, BATCH_SIZE, min_max_scaler_train)
input_dim = train_gen.getitem(0)[0].shape[1]
# The following two functions actually extract data on demand by using yield.
def gen_data_train():
j = 0
while j < NO_OF_EPOCHS:
for i in range(train_gen.len):
yield train_gen.getitem(i)
j += 1
def gen_data_valid():
k = 0
while k < NO_OF_EPOCHS:
for i in range(valid_gen.len):
yield valid_gen.getitem(i)
k += 1
# We will feed these generators to tf.data class. Notice it is important
# to specify the output signature.
# For autoencoder it is a tuple of numpy array where
# each numpy array has dimension equal to None, NO_OF_FEATURES.
# Why None because it is unknown how many rows will be picked during training.
# So we better specify None.
dataset_train = tf.data.Dataset.from_generator(gen_data_train, output_signature=(
tf.TensorSpec(shape=(None, input_dim), dtype=tf.float32),
tf.TensorSpec(shape=(None, input_dim),
dtype=tf.float32)))
# according to tf.data.Dataset.from_generator documentation we have to specify output_signature
dataset_valid = tf.data.Dataset.from_generator(gen_data_valid, output_signature=(
tf.TensorSpec(shape=(None, input_dim), dtype=tf.float32),
tf.TensorSpec(shape=(None, input_dim),
dtype=tf.float32)))
# according to tf.data.Dataset.from_generator documentation we have to specify output_signature
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment