sandeepnmenon · November 29, 2022 03:43
diff --git a/optimus.py b/optimus.py
 from scipy.optimize import nnls
 from scipy.optimize import curve_fit
 import math

 def optimus_fitting(df):
    """
    This function takes in a csv file and returns b0,b1 and b2 fitting the model
    l = 1/(b0*k + b1) + b2
    l is the training loss
    k is the number of iterations
    Solve for b0,b1 and b2 using non-negative least squares
    """
    df = df.dropna()
    train_loss = df['Train_Loss']
    epoch_num = df['Epoch_no']
    # Solve for b0,b1 and b2 using non-negative least squares
    param_bounds = ([0, 0, 0], [np.inf, np.inf, np.inf])
    # sigma = np.ones(len(epoch_num))
    # NUM_SEGMENTS = 3
    # for i in range(len(epoch_num)):
    #   exp = int(math.floor(i/(math.ceil(1.0*len(epoch_num)/NUM_SEGMENTS))))
    #   sigma[i] /= 4 ** exp

    # params = curve_fit(optimus_predict, epoch_num, train_loss, sigma=np.array(sigma), absolute_sigma=True,
    #                           bounds=param_bounds)

    params = curve_fit(optimus_predict, epoch_num, train_loss, bounds=param_bounds)

    return params[0]
    

 def optimus_predict(k, b0, b1, b2):
    """
    This function takes in b0, b1, b2 and k and returns the predicted loss
    l = 1/(b0*k + b1) + b2
    l is the training loss
    k is the number of iterations
    """
    return 1/(b0*k + b1) + b2
    
    
   # Plot training loss data points and the fitted curve for each model
 import matplotlib.pyplot as plt
 #import seaborn as sns
 import os
 import numpy as np
 import pandas as pd
 data_dir ="./p4_experiment_data"
 #sns.set()
 epoch_steps = [[1,82], [83,123], [124,350]]
 for model in resnet_models:
  for gpu in gpu_models:
    csv_file = os.path.join(data_dir, model + "_" + gpu + ".csv")
    if not os.path.isfile(csv_file):
      continue
    df = pd.read_csv(csv_file)
    df = df.dropna()
    # Split the data into 3 segments based on column Epoch_no
    df1 = df.loc[df['Epoch_no'] <= epoch_steps[0][1]]
    df2 = df.loc[(df['Epoch_no'] >= epoch_steps[1][0]) & (df['Epoch_no'] <= epoch_steps[1][1])]
    df3 = df.loc[df['Epoch_no'] >= epoch_steps[2][0]]
    # Fit the model for each segment
    b0, b1, b2 = optimus_fitting(df1)
    b0_2, b1_2, b2_2 = optimus_fitting(df2)
    b0_3, b1_3, b2_3 = optimus_fitting(df3)
    # Plot the data points and the fitted curve for each segment
    plt.figure(figsize=(10, 6))
    plt.plot(df1['Epoch_no'], df1['Train_Loss'], 'o', label='Data')
    plt.plot(df1['Epoch_no'], optimus_predict(df1['Epoch_no'], b0, b1, b2), 'r-', label='Fitted Curve')
    plt.plot(df2['Epoch_no'], df2['Train_Loss'], 'o', label='Data')
    plt.plot(df2['Epoch_no'], optimus_predict(df2['Epoch_no'], b0_2, b1_2, b2_2), 'r-', label='Fitted Curve')
    plt.plot(df3['Epoch_no'], df3['Train_Loss'], 'o', label='Data')
    plt.plot(df3['Epoch_no'], optimus_predict(df3['Epoch_no'], b0_3, b1_3, b2_3), 'r-', label='Fitted Curve')
    plt.xlabel('Epoch')
    plt.ylabel('Training Loss')
    plt.title(f"Training Loss vs Epoch for {model} on {gpu}")
    plt.legend()
    plt.show()
	from scipy.optimize import nnls
	from scipy.optimize import curve_fit
	import math

	def optimus_fitting(df):
	"""
	This function takes in a csv file and returns b0,b1 and b2 fitting the model
	l = 1/(b0*k + b1) + b2
	l is the training loss
	k is the number of iterations
	Solve for b0,b1 and b2 using non-negative least squares
	"""
	df = df.dropna()
	train_loss = df['Train_Loss']
	epoch_num = df['Epoch_no']
	# Solve for b0,b1 and b2 using non-negative least squares
	param_bounds = ([0, 0, 0], [np.inf, np.inf, np.inf])
	# sigma = np.ones(len(epoch_num))
	# NUM_SEGMENTS = 3
	# for i in range(len(epoch_num)):
	# exp = int(math.floor(i/(math.ceil(1.0*len(epoch_num)/NUM_SEGMENTS))))
	# sigma[i] /= 4 ** exp

	# params = curve_fit(optimus_predict, epoch_num, train_loss, sigma=np.array(sigma), absolute_sigma=True,
	# bounds=param_bounds)

	params = curve_fit(optimus_predict, epoch_num, train_loss, bounds=param_bounds)

	return params[0]


	def optimus_predict(k, b0, b1, b2):
	"""
	This function takes in b0, b1, b2 and k and returns the predicted loss
	l = 1/(b0*k + b1) + b2
	l is the training loss
	k is the number of iterations
	"""
	return 1/(b0*k + b1) + b2


	# Plot training loss data points and the fitted curve for each model
	import matplotlib.pyplot as plt
	#import seaborn as sns
	import os
	import numpy as np
	import pandas as pd
	data_dir ="./p4_experiment_data"
	#sns.set()
	epoch_steps = [[1,82], [83,123], [124,350]]
	for model in resnet_models:
	for gpu in gpu_models:
	csv_file = os.path.join(data_dir, model + "_" + gpu + ".csv")
	if not os.path.isfile(csv_file):
	continue
	df = pd.read_csv(csv_file)
	df = df.dropna()
	# Split the data into 3 segments based on column Epoch_no
	df1 = df.loc[df['Epoch_no'] <= epoch_steps[0][1]]
	df2 = df.loc[(df['Epoch_no'] >= epoch_steps[1][0]) & (df['Epoch_no'] <= epoch_steps[1][1])]
	df3 = df.loc[df['Epoch_no'] >= epoch_steps[2][0]]
	# Fit the model for each segment
	b0, b1, b2 = optimus_fitting(df1)
	b0_2, b1_2, b2_2 = optimus_fitting(df2)
	b0_3, b1_3, b2_3 = optimus_fitting(df3)
	# Plot the data points and the fitted curve for each segment
	plt.figure(figsize=(10, 6))
	plt.plot(df1['Epoch_no'], df1['Train_Loss'], 'o', label='Data')
	plt.plot(df1['Epoch_no'], optimus_predict(df1['Epoch_no'], b0, b1, b2), 'r-', label='Fitted Curve')
	plt.plot(df2['Epoch_no'], df2['Train_Loss'], 'o', label='Data')
	plt.plot(df2['Epoch_no'], optimus_predict(df2['Epoch_no'], b0_2, b1_2, b2_2), 'r-', label='Fitted Curve')
	plt.plot(df3['Epoch_no'], df3['Train_Loss'], 'o', label='Data')
	plt.plot(df3['Epoch_no'], optimus_predict(df3['Epoch_no'], b0_3, b1_3, b2_3), 'r-', label='Fitted Curve')
	plt.xlabel('Epoch')
	plt.ylabel('Training Loss')
	plt.title(f"Training Loss vs Epoch for {model} on {gpu}")
	plt.legend()
	plt.show()