Last active
July 25, 2022 20:51
-
-
Save jiahao87/e7d9ede444a41161879d7b4845f0a6c0 to your computer and use it in GitHub Desktop.
Full sample code for MLflow example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
from scipy.stats import uniform | |
from sklearn.datasets import load_iris | |
from sklearn.model_selection import train_test_split | |
from sklearn.model_selection import cross_validate | |
from sklearn import metrics | |
from sklearn.model_selection import ParameterSampler | |
from sklearn.ensemble import RandomForestClassifier | |
import mlflow | |
import mlflow.sklearn | |
####################### Amend configurations here ######################## | |
# Credentials | |
GOOGLE_APPLICATION_CREDENTIALS = <GOOGLE_APPLICATION_CREDENTIALS> # path to service account json file | |
MLFLOW_TRACKING_USERNAME = <MLFLOW_TRACKING_USERNAME> # username | |
MLFLOW_TRACKING_PASSWORD = <MLFLOW_TRACKING_PASSWORD> # password | |
experiment_name = "Experiment 1" # amend experiment name accordingly | |
tracking_uri = './mlruns' # Or external IP e.g., "http://35.225.50.9:80" | |
# Hyperparameters distribution for our model | |
hyperparams = {'max_depth':range(5,21), | |
'max_samples':uniform(loc=0.5, scale=0.5), | |
'max_features': [None, 'sqrt', 'log2']} | |
# Other fixed parameters | |
params = {'cv_folds':3, | |
'n_iter':6} | |
# Metrics to score and log | |
metrics = ['accuracy', 'f1_macro'] # run sorted(metrics.SCORERS.keys()) to see list of metrics avaliable | |
######################################################################## | |
# Set environment variables | |
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS | |
os.environ['MLFLOW_TRACKING_USERNAME'] = MLFLOW_TRACKING_USERNAME | |
os.environ['MLFLOW_TRACKING_PASSWORD'] = MLFLOW_TRACKING_PASSWORD | |
def data_processing(): | |
iris = load_iris() | |
X, y = iris.data, iris.target | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
return X_train, X_test, y_train, y_test | |
X_train, X_test, y_train, y_test = data_processing() | |
# Set experiment name | |
mlflow.set_experiment(experiment_name) | |
experiment = mlflow.get_experiment_by_name(experiment_name) | |
# Set path to log | |
mlflow.set_tracking_uri(tracking_uri) | |
param_list = list(ParameterSampler(hyperparams, n_iter=params['n_iter'], random_state=0)) | |
for run in range(params['n_iter']): | |
run_hyperparams = param_list[run] | |
with mlflow.start_run(experiment_id = experiment.experiment_id): | |
clf = RandomForestClassifier(max_depth=run_hyperparams['max_depth'], | |
max_samples=run_hyperparams['max_samples'], | |
max_features=run_hyperparams['max_features'], | |
random_state=0) | |
clf.fit(X_train, y_train) | |
scores = cross_validate(clf, X_train, y_train, | |
cv = params['cv_folds'], | |
scoring = metrics) | |
metrics_dict = {} | |
for metric in metrics: | |
metrics_dict[metric] = np.mean(scores['test_' + metric]) | |
#log model params | |
mlflow.log_params(run_hyperparams) | |
#log model params | |
mlflow.log_metrics(metrics_dict) | |
# log model | |
mlflow.sklearn.log_model(clf, "model") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment