Skip to content

Instantly share code, notes, and snippets.

@devarshi16
Created July 11, 2021 11:10
Show Gist options
  • Save devarshi16/5fd9266cf1fa83143088b1f630b363d3 to your computer and use it in GitHub Desktop.
Save devarshi16/5fd9266cf1fa83143088b1f630b363d3 to your computer and use it in GitHub Desktop.
Compare execution times of two different one hot encoding algorithms using numpy and python
import numpy as np
import matplotlib.pyplot as plt
import random
import time
def one_hot(Y):
data_size=Y.shape[0]
classes=np.unique(Y).reshape(-1,1)
num_classes=classes.shape[0]
class_mappings=np.arange(0,max(Y)+1)
class_mappings[np.unique(classes)]=np.arange(num_classes)
Y=class_mappings[Y]
one_hot=np.zeros((data_size,num_classes))
#rows=np.arange(data_size)
one_hot[np.arange(data_size).reshape(-1,1),Y.reshape(-1,1)]=1
class_col=np.sort(classes)
return one_hot,class_col
def one_hot_for(Y):
data_size=Y.shape[0]
classes=np.unique(Y).reshape(-1,1)
num_classes=classes.shape[0]
one_hot=np.zeros((data_size,num_classes))
for row in range(data_size):
one_hot[row,np.where(classes==Y[row])[0]]=1
return one_hot,classes
# Generate a randoms file
file_name="randoms.txt"
with open(file_name,"w+") as random_labels:
for i in range(10000):
random_labels.write(str(random.randint(0,1000))+"\n")
with open(file_name,"r+") as f:
Y=f.readlines()
int_map=map(int,Y)
Y=list(int_map)
Y=np.asarray(Y).reshape(-1,1)
one_hot_timings=[]
one_hot_for_timings=[]
for i in range(100,10000,100):
start=time.time()
_,_=one_hot(Y[:i])
end=time.time()
one_hot_timings.append(end-start)
start=time.time()
_,_=one_hot_for(Y[:i])
end=time.time()
one_hot_for_timings.append(end-start)
plt.plot(one_hot_timings,label="one_hot_vector")
plt.plot(one_hot_for_timings,label="one_hot_for")
plt.xlabel('data_size for every 100 datapoints')
plt.ylabel('time of execution')
plt.legend(loc='best')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment