almugabo · March 24, 2019 19:22
diff --git a/HungarianMethod.py b/HungarianMethod.py
 import pandas as pd 
 import numpy as np 
 from scipy.optimize import linear_sum_assignment


 def make_assignments(xDF):
    '''
    a simple wrapper around the 
    scipy.optimize.linear_sum_assignment 
    which implements the Hungarian Algorithm 

    # Input: 
    # a rectangular dataframe with costs (cost matrix)
    # Format: 
    # rowsxcolumns = rows = workers, cols = tasks     
    #-- OUTPUT:
    a dataframe with optimized assignments     
    
    # N.B: INDEX of the dataframe MUST be ids of the workers 
    # if more tasks than workers, we need to clone the workers 
    # see stackoverflow question : "Hungarian algorithm: multiple jobs per worker" 
    # see https://stackoverflow.com/questions/48108496
    
    # see docs on the scipy.optimize.linear_sum_assignment can deal with 
    # about input as rectangular matrix 
    # and also another inspiration http://software.clapper.org/munkres/

    TO DO : 
    1. add DISALLOWED assignements 
    N.B: Bug in the scipy when values and np.Inf or np.nan
    see stackoverflow: 
    https://stackoverflow.com/questions/42035999/
    2. ??? CONTROLING maximum number of tasks per worker and number 
    of workers per tasks ?? maybe through "cloning" (i.e. skipping)
    
    example from http://software.clapper.org/munkres/
    cost_matrix = [[5, 9, 1],
                  [10, 3, 2],
                  [8, 7, 4]]
    df = pd.DataFrame(cost_matrix, 
                      columns=['task_1', 'task_2', 'task_3'],
                      index = ['worker_1', 'worker_2', 'worker_3'])


    make_assignments(df)
    
    
    '''

    # How many clone are needed 
    # needed if more tasks than workeds 
    xclone_needed = np.int(np.ceil(xDF.shape[1] / xDF.shape[0])) - 1


    xDF_clones = pd.DataFrame(xDF.values, 
                              columns = xDF.columns,
                              index = [x + '_clone_1' for x in xDF.index])

    # make the clones  
    for xclone_nr in range(2, 2+xclone_needed):
        xClone = pd.DataFrame(xDF.values, 
                              columns = xDF.columns,
                              index = [x + '_clone_'+ str(xclone_nr) for x in xDF.index])
        xDF_clones = xDF_clones.append(xClone)

    # make the optimization of the assignemenrs 
    workers_idx, tasks_idx = linear_sum_assignment(xDF_clones.values)
    # get values by indices 
    x_tasks = np.take(a = xDF_clones.columns,
                      indices = tasks_idx)
    x_workers_1 = np.take(a = xDF_clones.index,
                        indices = workers_idx)
    # for workers we need to get the aggregate the tasks of the 
    # clones 
    x_workers = [x.split('_clone')[0] for x in x_workers_1]


    xDF_Res = pd.DataFrame(list(zip(x_workers, x_tasks)),
                           columns=['worker', 'task'])
    
    return xDF_Res
	import pandas as pd
	import numpy as np
	from scipy.optimize import linear_sum_assignment


	def make_assignments(xDF):
	'''
	a simple wrapper around the
	scipy.optimize.linear_sum_assignment
	which implements the Hungarian Algorithm

	# Input:
	# a rectangular dataframe with costs (cost matrix)
	# Format:
	# rowsxcolumns = rows = workers, cols = tasks
	#-- OUTPUT:
	a dataframe with optimized assignments

	# N.B: INDEX of the dataframe MUST be ids of the workers
	# if more tasks than workers, we need to clone the workers
	# see stackoverflow question : "Hungarian algorithm: multiple jobs per worker"
	# see https://stackoverflow.com/questions/48108496

	# see docs on the scipy.optimize.linear_sum_assignment can deal with
	# about input as rectangular matrix
	# and also another inspiration http://software.clapper.org/munkres/

	TO DO :
	1. add DISALLOWED assignements
	N.B: Bug in the scipy when values and np.Inf or np.nan
	see stackoverflow:
	https://stackoverflow.com/questions/42035999/
	2. ??? CONTROLING maximum number of tasks per worker and number
	of workers per tasks ?? maybe through "cloning" (i.e. skipping)

	example from http://software.clapper.org/munkres/
	cost_matrix = [[5, 9, 1],
	[10, 3, 2],
	[8, 7, 4]]
	df = pd.DataFrame(cost_matrix,
	columns=['task_1', 'task_2', 'task_3'],
	index = ['worker_1', 'worker_2', 'worker_3'])


	make_assignments(df)


	'''

	# How many clone are needed
	# needed if more tasks than workeds
	xclone_needed = np.int(np.ceil(xDF.shape[1] / xDF.shape[0])) - 1


	xDF_clones = pd.DataFrame(xDF.values,
	columns = xDF.columns,
	index = [x + '_clone_1' for x in xDF.index])

	# make the clones
	for xclone_nr in range(2, 2+xclone_needed):
	xClone = pd.DataFrame(xDF.values,
	columns = xDF.columns,
	index = [x + '_clone_'+ str(xclone_nr) for x in xDF.index])
	xDF_clones = xDF_clones.append(xClone)

	# make the optimization of the assignemenrs
	workers_idx, tasks_idx = linear_sum_assignment(xDF_clones.values)
	# get values by indices
	x_tasks = np.take(a = xDF_clones.columns,
	indices = tasks_idx)
	x_workers_1 = np.take(a = xDF_clones.index,
	indices = workers_idx)
	# for workers we need to get the aggregate the tasks of the
	# clones
	x_workers = [x.split('_clone')[0] for x in x_workers_1]


	xDF_Res = pd.DataFrame(list(zip(x_workers, x_tasks)),
	columns=['worker', 'task'])

	return xDF_Res