ngopal · April 25, 2019 00:07
diff --git a/loading_pretrained_embeddings.py b/loading_pretrained_embeddings.py
 # The first step is to load the pre-trained vectors into python. The example below uses glove data.
 import os
 GLOVE_DIR = "/path/to/pretrained/embeddings/glove.6B/"
 embeddings_index = {}
 f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), "r")
 for line in f:
  values = line.split()
  word = values[0]
  coefs = np.asarray(values[1:], dtype='float32')
  embeddings_index[word] = coefs
 f.close()

 EMBEDDING_DIM = 100
 embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
 for word, i in word_index.items():
  embedding_vector = embeddings_index.get(word)
  if embedding_vector is not None:
    # words not found in embedding index will be all-zeros.
    embedding_matrix[i] = embedding_vector
    
 # The second step is to load this data into a keras embedding layer
 from keras.layers import Embedding
 MAX_SEQUENCE_LENGTH = 1000
 embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)
	# The first step is to load the pre-trained vectors into python. The example below uses glove data.
	import os
	GLOVE_DIR = "/path/to/pretrained/embeddings/glove.6B/"
	embeddings_index = {}
	f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), "r")
	for line in f:
	values = line.split()
	word = values[0]
	coefs = np.asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
	f.close()

	EMBEDDING_DIM = 100
	embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
	for word, i in word_index.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
	# words not found in embedding index will be all-zeros.
	embedding_matrix[i] = embedding_vector

	# The second step is to load this data into a keras embedding layer
	from keras.layers import Embedding
	MAX_SEQUENCE_LENGTH = 1000
	embedding_layer = Embedding(len(word_index) + 1,
	EMBEDDING_DIM,
	weights=[embedding_matrix],
	input_length=MAX_SEQUENCE_LENGTH,
	trainable=False)