Last active
December 13, 2021 07:19
-
-
Save vishalsingha/ea8c7256db99bb6b8549cc42d6d73cb2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_prediction(X, clf_path, class_encoding_path, std_path, col_seq_path, good_features_path ): | |
X = pd.DataFrame(X.values.reshape(1, -1), columns = X.index, dtype = float) | |
kingdom_classes = ['Archea', 'Bacteria', 'Eukaryots'] | |
#load class encoding | |
with open(class_encoding_path, 'rb') as file: | |
le = pickle.load(file) | |
file.close() | |
# load std_path file | |
with open(std_path, 'rb') as file: | |
std_ = pickle.load(file) | |
file.close() | |
# load classifier | |
with open(clf_path, 'rb') as file: | |
clf = pickle.load(file) | |
file.close() | |
# load col_seq | |
with open(col_seq_path, 'rb') as file: | |
col_seq = pickle.load(file) | |
file.close() | |
with open(good_features_path, 'rb') as file: | |
good_features = pickle.load(file) | |
file.close() | |
# function for calculating double feature | |
def get_XX_feature(val, f): | |
temp = 0 | |
for col in val.columns: | |
if col[0:2]==f or col[-1:-3]==f or (col[0]==f[0] and col[-1]==f[0]): | |
try: | |
temp = temp + float(val.iloc[0][col]) | |
except: | |
print(f'There has been a error while calculating {f}') | |
return temp | |
# function for calculating single occurance feature | |
def get_X_feature(val, f): | |
temp = 0 | |
for col in val.columns: | |
if f in col and len(col)==3: | |
try: | |
temp = temp + float(val.iloc[0][col]) | |
except: | |
print(f'There has been a error while calculating {f}') | |
return temp | |
X_copy = X.copy() | |
engineered_cols = ['kurt', 'median', 'mode','var', 'max', 'min', 'q1', 'q2', 'q3', 'std', 'sum', 'UU', 'AA', 'CC', 'GG', 'sum_g', 'sum_a', 'sum_c', 'sum_u'] | |
kurt = X.kurtosis(axis = 1).values[0] | |
med = X.median(axis = 1).values[0] | |
mode = X.mode(axis = 1).values[0][0] | |
var = X.var(axis = 1).values[0] | |
max_ = X.max(axis = 1).values[0] | |
min_ = X.min(axis = 1).values[0] | |
q1 = X.quantile(0.25, axis = 1).values[0] | |
q2 = X.quantile(0.50, axis = 1).values[0] | |
q3 = X.quantile(0.75, axis = 1).values[0] | |
std = X.std(axis = 1).values[0] | |
sum_ = X.sum(axis = 1).values[0] | |
UU = get_XX_feature(X, 'UU') | |
AA = get_XX_feature(X, 'AA') | |
CC = get_XX_feature(X, 'CC') | |
GG = get_XX_feature(X, 'GG') | |
sum_g = get_X_feature(X, 'G') | |
sum_a = get_X_feature(X, 'A') | |
sum_c = get_X_feature(X, 'C') | |
sum_u = get_X_feature(X, 'U') | |
X['kurt'] = kurt | |
X['median'] = med | |
X['mode'] = mode | |
X['var'] = var | |
X['max'] = max_ | |
X['min'] = min_ | |
X['q1'] = q1 | |
X['q2'] = q2 | |
X['q3'] = q3 | |
X['std'] = std | |
X['sum'] = sum_ | |
X['UU'] = UU | |
X['AA'] = AA | |
X['CC'] = CC | |
X['GG'] = GG | |
X['sum_g'] = sum_g | |
X['sum_a'] =sum_a | |
X['sum_c'] = sum_c | |
X['sum_u'] = sum_u | |
X = pd.DataFrame(std_.transform(X, ), columns = X.columns) | |
X = X[good_features] | |
pred = clf.predict(X) | |
return pred, kingdom_classes[pred[0]], X.values.tolist()[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment