Skip to content

Instantly share code, notes, and snippets.

@vishalsingha
Last active December 13, 2021 07:19
Show Gist options
  • Save vishalsingha/ea8c7256db99bb6b8549cc42d6d73cb2 to your computer and use it in GitHub Desktop.
Save vishalsingha/ea8c7256db99bb6b8549cc42d6d73cb2 to your computer and use it in GitHub Desktop.
def make_prediction(X, clf_path, class_encoding_path, std_path, col_seq_path, good_features_path ):
X = pd.DataFrame(X.values.reshape(1, -1), columns = X.index, dtype = float)
kingdom_classes = ['Archea', 'Bacteria', 'Eukaryots']
#load class encoding
with open(class_encoding_path, 'rb') as file:
le = pickle.load(file)
file.close()
# load std_path file
with open(std_path, 'rb') as file:
std_ = pickle.load(file)
file.close()
# load classifier
with open(clf_path, 'rb') as file:
clf = pickle.load(file)
file.close()
# load col_seq
with open(col_seq_path, 'rb') as file:
col_seq = pickle.load(file)
file.close()
with open(good_features_path, 'rb') as file:
good_features = pickle.load(file)
file.close()
# function for calculating double feature
def get_XX_feature(val, f):
temp = 0
for col in val.columns:
if col[0:2]==f or col[-1:-3]==f or (col[0]==f[0] and col[-1]==f[0]):
try:
temp = temp + float(val.iloc[0][col])
except:
print(f'There has been a error while calculating {f}')
return temp
# function for calculating single occurance feature
def get_X_feature(val, f):
temp = 0
for col in val.columns:
if f in col and len(col)==3:
try:
temp = temp + float(val.iloc[0][col])
except:
print(f'There has been a error while calculating {f}')
return temp
X_copy = X.copy()
engineered_cols = ['kurt', 'median', 'mode','var', 'max', 'min', 'q1', 'q2', 'q3', 'std', 'sum', 'UU', 'AA', 'CC', 'GG', 'sum_g', 'sum_a', 'sum_c', 'sum_u']
kurt = X.kurtosis(axis = 1).values[0]
med = X.median(axis = 1).values[0]
mode = X.mode(axis = 1).values[0][0]
var = X.var(axis = 1).values[0]
max_ = X.max(axis = 1).values[0]
min_ = X.min(axis = 1).values[0]
q1 = X.quantile(0.25, axis = 1).values[0]
q2 = X.quantile(0.50, axis = 1).values[0]
q3 = X.quantile(0.75, axis = 1).values[0]
std = X.std(axis = 1).values[0]
sum_ = X.sum(axis = 1).values[0]
UU = get_XX_feature(X, 'UU')
AA = get_XX_feature(X, 'AA')
CC = get_XX_feature(X, 'CC')
GG = get_XX_feature(X, 'GG')
sum_g = get_X_feature(X, 'G')
sum_a = get_X_feature(X, 'A')
sum_c = get_X_feature(X, 'C')
sum_u = get_X_feature(X, 'U')
X['kurt'] = kurt
X['median'] = med
X['mode'] = mode
X['var'] = var
X['max'] = max_
X['min'] = min_
X['q1'] = q1
X['q2'] = q2
X['q3'] = q3
X['std'] = std
X['sum'] = sum_
X['UU'] = UU
X['AA'] = AA
X['CC'] = CC
X['GG'] = GG
X['sum_g'] = sum_g
X['sum_a'] =sum_a
X['sum_c'] = sum_c
X['sum_u'] = sum_u
X = pd.DataFrame(std_.transform(X, ), columns = X.columns)
X = X[good_features]
pred = clf.predict(X)
return pred, kingdom_classes[pred[0]], X.values.tolist()[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment