vishalsingha · December 13, 2021 07:19
diff --git a/codon_usage_deploy1.py b/codon_usage_deploy1.py
 def make_prediction(X, clf_path, class_encoding_path, std_path, col_seq_path, good_features_path ):   
    X = pd.DataFrame(X.values.reshape(1, -1), columns = X.index, dtype = float)
    kingdom_classes = ['Archea', 'Bacteria', 'Eukaryots']
    
    
    #load class encoding
    with open(class_encoding_path, 'rb') as file:
        le = pickle.load(file)
    file.close()
    
    # load std_path file
    with open(std_path, 'rb') as file:
        std_ = pickle.load(file)
    file.close()
    
    # load classifier     
    with open(clf_path, 'rb') as file:
        clf = pickle.load(file)
    file.close()
    
    # load col_seq     
    with open(col_seq_path, 'rb') as file:
        col_seq = pickle.load(file)
    file.close()
    with open(good_features_path, 'rb') as file:
        good_features = pickle.load(file)
    file.close()
    
    
    # function for calculating double feature
    def get_XX_feature(val, f):
        temp = 0
        for col in val.columns:
            if col[0:2]==f or col[-1:-3]==f or (col[0]==f[0] and col[-1]==f[0]):
                try: 
                    temp = temp + float(val.iloc[0][col])
                except:
                    print(f'There has been a error while calculating {f}')
        return temp
    
    # function for calculating single occurance feature
    def get_X_feature(val, f):
        temp = 0
        for col in val.columns:
            if f in col and len(col)==3:
                try:
                    temp = temp + float(val.iloc[0][col])
                except:
                    print(f'There has been a error while calculating {f}')
        return temp
    X_copy = X.copy()
    engineered_cols = ['kurt', 'median', 'mode','var', 'max', 'min', 'q1', 'q2', 'q3', 'std', 'sum', 'UU', 'AA', 'CC', 'GG', 'sum_g', 'sum_a', 'sum_c', 'sum_u']
    kurt = X.kurtosis(axis = 1).values[0]
    med = X.median(axis = 1).values[0]
    mode = X.mode(axis = 1).values[0][0]
    var = X.var(axis = 1).values[0]
    max_ = X.max(axis = 1).values[0]
    min_ = X.min(axis = 1).values[0]
    q1 = X.quantile(0.25, axis = 1).values[0]
    q2 = X.quantile(0.50, axis = 1).values[0]
    q3 = X.quantile(0.75, axis = 1).values[0]
    std = X.std(axis = 1).values[0]
    sum_ = X.sum(axis = 1).values[0]
    UU = get_XX_feature(X, 'UU')
    AA = get_XX_feature(X, 'AA')
    CC = get_XX_feature(X, 'CC')
    GG = get_XX_feature(X, 'GG')
    sum_g = get_X_feature(X, 'G')
    sum_a = get_X_feature(X, 'A')
    sum_c = get_X_feature(X, 'C')
    sum_u = get_X_feature(X, 'U')
    
    X['kurt'] = kurt
    X['median'] = med
    X['mode'] = mode
    X['var'] = var
    X['max'] = max_
    X['min'] = min_
    X['q1'] = q1
    X['q2'] = q2
    X['q3'] = q3
    X['std'] = std
    X['sum'] = sum_
    X['UU'] = UU
    X['AA'] = AA
    X['CC'] = CC
    X['GG'] = GG
    X['sum_g'] = sum_g
    X['sum_a'] =sum_a
    X['sum_c'] = sum_c
    X['sum_u'] = sum_u
    
    X = pd.DataFrame(std_.transform(X, ), columns = X.columns)
    
    X = X[good_features]
    pred = clf.predict(X)
    
    return pred, kingdom_classes[pred[0]], X.values.tolist()[0]
	def make_prediction(X, clf_path, class_encoding_path, std_path, col_seq_path, good_features_path ):
	X = pd.DataFrame(X.values.reshape(1, -1), columns = X.index, dtype = float)
	kingdom_classes = ['Archea', 'Bacteria', 'Eukaryots']


	#load class encoding
	with open(class_encoding_path, 'rb') as file:
	le = pickle.load(file)
	file.close()

	# load std_path file
	with open(std_path, 'rb') as file:
	std_ = pickle.load(file)
	file.close()

	# load classifier
	with open(clf_path, 'rb') as file:
	clf = pickle.load(file)
	file.close()

	# load col_seq
	with open(col_seq_path, 'rb') as file:
	col_seq = pickle.load(file)
	file.close()
	with open(good_features_path, 'rb') as file:
	good_features = pickle.load(file)
	file.close()


	# function for calculating double feature
	def get_XX_feature(val, f):
	temp = 0
	for col in val.columns:
	if col[0:2]==f or col[-1:-3]==f or (col[0]==f[0] and col[-1]==f[0]):
	try:
	temp = temp + float(val.iloc[0][col])
	except:
	print(f'There has been a error while calculating {f}')
	return temp

	# function for calculating single occurance feature
	def get_X_feature(val, f):
	temp = 0
	for col in val.columns:
	if f in col and len(col)==3:
	try:
	temp = temp + float(val.iloc[0][col])
	except:
	print(f'There has been a error while calculating {f}')
	return temp
	X_copy = X.copy()
	engineered_cols = ['kurt', 'median', 'mode','var', 'max', 'min', 'q1', 'q2', 'q3', 'std', 'sum', 'UU', 'AA', 'CC', 'GG', 'sum_g', 'sum_a', 'sum_c', 'sum_u']
	kurt = X.kurtosis(axis = 1).values[0]
	med = X.median(axis = 1).values[0]
	mode = X.mode(axis = 1).values[0][0]
	var = X.var(axis = 1).values[0]
	max_ = X.max(axis = 1).values[0]
	min_ = X.min(axis = 1).values[0]
	q1 = X.quantile(0.25, axis = 1).values[0]
	q2 = X.quantile(0.50, axis = 1).values[0]
	q3 = X.quantile(0.75, axis = 1).values[0]
	std = X.std(axis = 1).values[0]
	sum_ = X.sum(axis = 1).values[0]
	UU = get_XX_feature(X, 'UU')
	AA = get_XX_feature(X, 'AA')
	CC = get_XX_feature(X, 'CC')
	GG = get_XX_feature(X, 'GG')
	sum_g = get_X_feature(X, 'G')
	sum_a = get_X_feature(X, 'A')
	sum_c = get_X_feature(X, 'C')
	sum_u = get_X_feature(X, 'U')

	X['kurt'] = kurt
	X['median'] = med
	X['mode'] = mode
	X['var'] = var
	X['max'] = max_
	X['min'] = min_
	X['q1'] = q1
	X['q2'] = q2
	X['q3'] = q3
	X['std'] = std
	X['sum'] = sum_
	X['UU'] = UU
	X['AA'] = AA
	X['CC'] = CC
	X['GG'] = GG
	X['sum_g'] = sum_g
	X['sum_a'] =sum_a
	X['sum_c'] = sum_c
	X['sum_u'] = sum_u

	X = pd.DataFrame(std_.transform(X, ), columns = X.columns)

	X = X[good_features]
	pred = clf.predict(X)

	return pred, kingdom_classes[pred[0]], X.values.tolist()[0]