jiahao87 · July 25, 2022 20:51
diff --git a/mlflow_full_sample.py b/mlflow_full_sample.py
 import os
 import numpy as np
 from scipy.stats import uniform
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import cross_validate
 from sklearn import metrics
 from sklearn.model_selection import ParameterSampler
 from sklearn.ensemble import RandomForestClassifier

 import mlflow
 import mlflow.sklearn


 ####################### Amend configurations here ########################

 # Credentials
 GOOGLE_APPLICATION_CREDENTIALS = <GOOGLE_APPLICATION_CREDENTIALS>  # path to service account json file
 MLFLOW_TRACKING_USERNAME = <MLFLOW_TRACKING_USERNAME>  # username
 MLFLOW_TRACKING_PASSWORD = <MLFLOW_TRACKING_PASSWORD>  # password

 experiment_name = "Experiment 1"  # amend experiment name accordingly
 tracking_uri = './mlruns'  # Or external IP e.g., "http://35.225.50.9:80"

 # Hyperparameters distribution for our model
 hyperparams = {'max_depth':range(5,21),
               'max_samples':uniform(loc=0.5, scale=0.5), 
               'max_features': [None, 'sqrt', 'log2']}

 # Other fixed parameters
 params = {'cv_folds':3,
          'n_iter':6}

 # Metrics to score and log
 metrics = ['accuracy', 'f1_macro']  # run sorted(metrics.SCORERS.keys()) to see list of metrics avaliable

 ########################################################################

 # Set environment variables
 os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS
 os.environ['MLFLOW_TRACKING_USERNAME'] = MLFLOW_TRACKING_USERNAME
 os.environ['MLFLOW_TRACKING_PASSWORD'] = MLFLOW_TRACKING_PASSWORD


 def data_processing():
    iris = load_iris()
    X, y = iris.data, iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    return X_train, X_test, y_train, y_test
  
 X_train, X_test, y_train, y_test = data_processing()

 # Set experiment name
 mlflow.set_experiment(experiment_name) 
 experiment = mlflow.get_experiment_by_name(experiment_name)

 # Set path to log
 mlflow.set_tracking_uri(tracking_uri)

 param_list = list(ParameterSampler(hyperparams, n_iter=params['n_iter'], random_state=0))

 for run in range(params['n_iter']):
    run_hyperparams = param_list[run]

    with mlflow.start_run(experiment_id = experiment.experiment_id):
        clf = RandomForestClassifier(max_depth=run_hyperparams['max_depth'], 
                                     max_samples=run_hyperparams['max_samples'], 
                                     max_features=run_hyperparams['max_features'],
                                     random_state=0)
        clf.fit(X_train, y_train)

        scores = cross_validate(clf, X_train, y_train, 
                                cv = params['cv_folds'],
                                scoring = metrics)

        metrics_dict = {}
        for metric in metrics:
            metrics_dict[metric] = np.mean(scores['test_' + metric])

        #log model params
        mlflow.log_params(run_hyperparams)

        #log model params
        mlflow.log_metrics(metrics_dict)

        # log model
        mlflow.sklearn.log_model(clf, "model")
	import os
	import numpy as np
	from scipy.stats import uniform
	from sklearn.datasets import load_iris
	from sklearn.model_selection import train_test_split
	from sklearn.model_selection import cross_validate
	from sklearn import metrics
	from sklearn.model_selection import ParameterSampler
	from sklearn.ensemble import RandomForestClassifier

	import mlflow
	import mlflow.sklearn


	####################### Amend configurations here ########################

	# Credentials
	GOOGLE_APPLICATION_CREDENTIALS = <GOOGLE_APPLICATION_CREDENTIALS> # path to service account json file
	MLFLOW_TRACKING_USERNAME = <MLFLOW_TRACKING_USERNAME> # username
	MLFLOW_TRACKING_PASSWORD = <MLFLOW_TRACKING_PASSWORD> # password

	experiment_name = "Experiment 1" # amend experiment name accordingly
	tracking_uri = './mlruns' # Or external IP e.g., "http://35.225.50.9:80"

	# Hyperparameters distribution for our model
	hyperparams = {'max_depth':range(5,21),
	'max_samples':uniform(loc=0.5, scale=0.5),
	'max_features': [None, 'sqrt', 'log2']}

	# Other fixed parameters
	params = {'cv_folds':3,
	'n_iter':6}

	# Metrics to score and log
	metrics = ['accuracy', 'f1_macro'] # run sorted(metrics.SCORERS.keys()) to see list of metrics avaliable

	########################################################################

	# Set environment variables
	os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS
	os.environ['MLFLOW_TRACKING_USERNAME'] = MLFLOW_TRACKING_USERNAME
	os.environ['MLFLOW_TRACKING_PASSWORD'] = MLFLOW_TRACKING_PASSWORD


	def data_processing():
	iris = load_iris()
	X, y = iris.data, iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
	return X_train, X_test, y_train, y_test

	X_train, X_test, y_train, y_test = data_processing()

	# Set experiment name
	mlflow.set_experiment(experiment_name)
	experiment = mlflow.get_experiment_by_name(experiment_name)

	# Set path to log
	mlflow.set_tracking_uri(tracking_uri)

	param_list = list(ParameterSampler(hyperparams, n_iter=params['n_iter'], random_state=0))

	for run in range(params['n_iter']):
	run_hyperparams = param_list[run]

	with mlflow.start_run(experiment_id = experiment.experiment_id):
	clf = RandomForestClassifier(max_depth=run_hyperparams['max_depth'],
	max_samples=run_hyperparams['max_samples'],
	max_features=run_hyperparams['max_features'],
	random_state=0)
	clf.fit(X_train, y_train)

	scores = cross_validate(clf, X_train, y_train,
	cv = params['cv_folds'],
	scoring = metrics)

	metrics_dict = {}
	for metric in metrics:
	metrics_dict[metric] = np.mean(scores['test_' + metric])

	#log model params
	mlflow.log_params(run_hyperparams)

	#log model params
	mlflow.log_metrics(metrics_dict)

	# log model
	mlflow.sklearn.log_model(clf, "model")