hoenirvili · November 7, 2017 13:56
diff --git a/data.csv b/data.csv
diff --git a/graph.py b/graph.py
 #!/usr/bin/env python3

 import pandas as pd
 import sklearn 
 from sklearn.datasets import load_iris
 from sklearn import tree
 import numpy as np
 import graphviz 

 def retrieve_target_names(dataset):
    target = dataset['comestibila'].sort_values().values
    return (target, np.array(['necomestibila','comestibila']))

 def main():
    mushrooms = pd.read_csv('data.csv')
    feature_names = mushrooms.columns.tolist()[:4]
    target, target_names = retrieve_target_names(mushrooms)
    data = mushrooms[feature_names].values

    classifier = tree.DecisionTreeClassifier(criterion='entropy')
    classifier.fit(data, target)
    dot_data = tree.export_graphviz(classifier, out_file=None, 
                         feature_names=feature_names,  
                         class_names=target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
    graph = graphviz.Source(dot_data)  
    graph.render("mushrooms")



 if __name__ == '__main__':
    main()
diff --git a/id3.py b/id3.py
 #!/usr/bin/env python3

 import csv
 import sys
 import math
 import copy

 def entropy(partition):
    """
        partition : list [2,5]
    """
    
    total = sum(partition)
    entropy = 0
    # compute the entropy of all elements
    for element in partition:
        if element == 0:
            continue
        p = element/total # compute the probability 
        entropy -= (p * math.log2(p))

    return entropy

 def ig(partitions):
    """
        partitions: list [ [1,2], [2, 5] ]
    """

    n = len(partitions)
    # make the root partition of all all the children
    root = [0 for l in range(0, n)]
    for column, partition in enumerate(partitions):
        for row, _ in enumerate(partition):
            root[column] += partitions[row][column]

    # compute the root entropy
    root_entropy = entropy(root)
    # get the number of instances of the decision stamp
    instances = sum(root)

    avg_entropy = 0
    for partition in partitions:
        part_sum = sum(partition)
        avg_entropy = avg_entropy + ((part_sum/instances) * entropy(partition))

    return root_entropy - avg_entropy

 def dmap(attributes, data):
    """
    atributes: ['usoara', 'mirositoare', 'arepete', 'neteda', 'comestibila']

    data:
        ['1', '0', '0', '0', '1']
        ['1', '0', '1', '0', '1']
        ['0', '1', '0', '1', '1']
        ['0', '0', '0', '1', '0']
        ['1', '1', '1', '0', '0']
        ['1', '0', '1', '1', '0']
        ['1', '0', '0', '1', '0']
        ['0', '1', '0', '0', '0']
        

    res: key=>value 

        usoara:       ['1', '1', '0', '0', '1', '1', '1', '0']
        mirositoare:  ['0', '0', '1', '0', '1', '0', '0', '1']
        arepete:      ['0', '1', '0', '0', '1', '1', '0', '0']
        neteda:       ['0', '0', '1', '1', '0', '1', '1', '0']
        comestibila:  ['1', '1', '1', '0', '0', '0', '0', '0']

    """
    m = len(data[0])
    n = len(data)

    columns = []
    for j in range(0, m):
        column = []
        for i in range(0, n):
            column.append(data[i][j])
        columns.append(column)

    return dict(zip(attributes, columns))

 def decision_stamps(dmap):
    """

    dmap:

        usoara:       ['1', '1', '0', '0', '1', '1', '1', '0']
        mirositoare:  ['0', '0', '1', '0', '1', '0', '0', '1']
        arepete:      ['0', '1', '0', '0', '1', '1', '0', '0']
        neteda:       ['0', '0', '1', '1', '0', '1', '1', '0']
        comestibila:  ['1', '1', '1', '0', '0', '0', '0', '0']

    res:
        {
            'usoara': {
                        '0': {'0': 2, '1': 1}, 
                        '1': {'0': 3, '1': 2}
                      }, 
            'mirositoare': {
                            '0': {'0': 3, '1': 2}, 
                            '1': {'0': 2, '1': 1}
                            }, 
            'arepete': {
                        '0': {'0': 3, '1': 2}, 
                        '1': {'0': 2, '1': 1}
                       }, 
            'neteda': {
                        '0': {'0': 2, '1': 2}, 
                        '1': {'0': 3, '1': 1}
                    }
        }
    """
    ds = {}
    decision_key = [*dmap][-1]
    decisions = dmap[decision_key]

    for key, value in dmap.items():
        if key == decision_key:
            continue
        ds[key] = {}
        unique_values = list(set(value))
        for uv in unique_values:
            subset = []
            for i, v in enumerate(value):
                if v == uv:
                    subset += [decisions[i]]
            unique_decision = list(set(decisions))
            uv_l = []
            for d in unique_decision:
                uv_l.append(subset.count(d))
            ds[key][uv] = dict(zip(unique_decision, uv_l))

    return ds

 def all_partitions(decision_stamps):
    """
        decision_stamps:

        {
            'usoara': {
                        '0': {'0': 2, '1': 1}, 
                        '1': {'0': 3, '1': 2}
                      }, 
            'mirositoare': {
                            '0': {'0': 3, '1': 2}, 
                            '1': {'0': 2, '1': 1}
                            }, 
            'arepete': {
                        '0': {'0': 3, '1': 2}, 
                        '1': {'0': 2, '1': 1}
                       }, 
            'neteda': {
                        '0': {'0': 2, '1': 2}, 
                        '1': {'0': 3, '1': 1}
                    }
        }

        res: 
        {   
            'usoara': [[2, 3], [1, 2]], 
            'mirositoare': [[1, 2], [2, 3]], 
            'arepete': [[1, 2], [2, 3]], 
            'neteda': [[1, 3], [2, 2]]
        }
 

    """
    partitions = {}
    for key, decision_stamp in decision_stamps.items():
        partitions[key] = []
        for partition in decision_stamp.values():
            p = [*partition.values()]
            partitions[key].append(p)

    return partitions

 def best_attribute(partitions):
    """
        partitions:

        {   
            'usoara': [[2, 3], [1, 2]], 
            'mirositoare': [[1, 2], [2, 3]], 
            'arepete': [[1, 2], [2, 3]], 
            'neteda': [[1, 3], [2, 2]]
        }


        res: 
            neteda
    """
    if len(partitions.values()) == 2:
        return [*partitions.keys()][0]

    max_ig = 0
    max_ig_name = ''
    for key, values in partitions.items():
        a = ig(values)
        if a > max_ig:
                max_ig = a
                max_ig_name = key

    return max_ig_name

 def filter_data(data, attributes, attribute, attribute_value):
    """
    using data, attributes, target_attribute and target_attribute_value
    filter based on traget_attribute_value the data and return it's subset
    """
    s = copy.deepcopy(data)
    subset = None

    index = attributes.index(attribute)
    subset = []
    for row in s:
        if row[index] == attribute_value:
            subset.append(row)
    s = subset

    return subset

 def remove_column(data, col):
    """
    remove the hole column in our newly copied data
    """
    d = copy.deepcopy(data)
    for row in d:
        row.pop(col)

    return d

 def pick_best_attribute(attributes, data):
    """
    for the attributes and data given compute pick, select
    the best attribute that has the max information gain
    """

    # if we are dealing with just tow attributes
    # this means we have just only one attribute to classify
    # and we return that exact attribute
    if len(attributes) == 2:
        attribute = attributes[0]
        dmapp = dmap(attribute, data)
        ds = decision_stamps(dmapp)
        # return attribute and his decision stamp
        return (attribute, ds)
 
    # make a mapping out of all attributes and data
    dmapp = dmap(attributes, data)
    # create decision stamps of the mapping
    ds = decision_stamps(dmapp)
    # return just the partitions of the decision stamps
    parts = all_partitions(ds)
    # for all the partitions compute the best attribute
    attribute = best_attribute(parts)
    
    # return the best attribute and his decision stamp
    return (attribute, ds)

 def pick_subset(data, attributes, attribute, vertice):
    """
    for the given data, attributes, target_attribute, and his
    vertice
    pick the subset / instances that has includes the target_attribute
    vertice and return the subset and the corresponding attribute subset as a tuple
    """
    # filter the data based on the attribute and vertice
    data = filter_data(data, attributes, attribute, vertice)
    # because attributes is a list, make a copy and preserve the original list
    attr = copy.copy(attributes)
    # compute the index and remove the attribute 
    # from the list of attributes
    idx = attr.index(attribute) # compute the index of the attribute in attributes
    attr.remove(attribute) # remove the attribute from the list

    # after the filter_data process we should also
    # remove the column that contains the vertice value
    data = remove_column(data, idx)

    # return the subset pair data
    return (data, attr)

 class Node:
    """
    Node represents a single decision node in our Id3
    tree. This will hold the attribute name, his neighbours and
    his decision stamp
    """

    def __init__(self, attribute=None, stamp=None):
        if stamp == None and attribute == None:
            self.attribute = None
            self.stamp = None
            self.neighbours = None
            return

        self.attribute = attribute
        self.stamp = stamp
        self.neighbours = {}

        # for every stamp we have we should make now the decisions
        # of every vertices and if we can't make the decision we should
        # add in self.neighbours a None value
        for vertice, s in self.stamp.items():
            self.neighbours[vertice] = self._decision(s)

    def __repr__(self):
        return self.__str__()

    @property
    def vertices(self):
        return [*self.neighbours]

    def __str__(self):
        message = ''
        message += '[NB attribute = {}, '.format(self.attribute)
        message += 'stamp = {}, '.format(self.stamp)
        for key, value in self.neighbours.items():
            message += 'vertice:{} => decision|node {} '.format(key, value)
        message += 'NE]'
        return message

    def _decision(self, s):
        """
        for every decision_stamp value
        check if we can make a decision and classify our
        examples or we need to mark it as unknown for know
        """
        aparitions = 0
        dec = None
        dict_values = s.values()

        values = [*dict_values]
        for v in values:
            if v > 0:
                dec = v
                aparitions = aparitions + 1

        if aparitions > 1:
            # this means we don't have a partition that classify
            # our instances perfectly
            return None

        keys = [*s.keys()] # get all keys of the dict
        return keys[values.index(dec)] # get the value key in the dict

    def empty(self):
        return (self.stamp == None and 
            self.attribute == None and 
            self.neighbours == None)

    def push(self, node):
        if self.empty():
                self.stamp = node.stamp
                self.attribute = node.attribute
                self.neighbours = node.neighbours
                return

        for neighbour in self.neighbours.values():
            if neighbour == None:
                neighbour = node

    def push_neighbours(self, vertice, node):
        if self.empty():
                self.stamp = node.stamp
                self.attribute = node.attribute
                self.neighbours = node.neighbours
                return

        if self.neighbours[vertice] != None:
            raise ValueError(
                    'An already decision was made for vertice {} decision {}'.
                    format(vertice, self.neighbours[vertice])
            )
        
        self.neighbours[vertice] = node

    def neighbour(self, vertice):
        return self.neighbours[vertice]

    def node_enighbours_are_classified(self):
        """
        Do we still have neighbours that needs 
        to be classified. If yes then return False
        else return True
        """
        if self.empty():
            return False
        
        for v in self.neighbours.values():
            if v == None:
                return False

        return True

 class Tree(object):
    """
        Tree is a general purpose tree that holds
        Id3 nodes
    """

    # starting node
    root = None
    # maintain the current node
    current = None

    def empty(self):
        return (self.root == None and self.current == None)

    def classify(self, attributes, instance):
        node = self.root # take the root node
        decision = None
        while node != None :
            idx = attributes.index(node.attribute) # get the node attribute
            vertice = instance[idx] # retrieve the instance vertice of that attribute
            value = node.neighbour(vertice) # take the decision/node
            # if this is not a Node it's an decision , take it and stop
            if not isinstance(value, Node):
                decision = value
                break

            # this means we have a node, not a decision
            node = value

        return decision

    def push(self, new_node):
        """
        push the new node maintaining
        the root and current balance
        """

        if self.empty():
            self.root = new_node
            self.current = new_node
            return

        # push the node to the current not classify neighbour
        self.current.push(new_node)

    def push_neighbours(self, vertice, new_node):
        if self.empty():
            raise ValueError("Can't push neighbours to a empty tree")

        self.current.push_neighbours(vertice, new_node)
        if not new_node.node_enighbours_are_classified():
            if isinstance(value):
                self.current = new_node


 def id3(data, attributes):
    tree = Tree()

    attribute, ds = pick_best_attribute(attributes, data)

    node = Node(attribute, ds[attribute])
    tree.push(node)

    if node.node_enighbours_are_classified():
        return tree.root

    for vertice in node.vertices:
        subset = pick_subset(data, attributes, attribute, vertice)
        node = id3(*subset)
        tree.push_neighbours(vertice, node)

    return tree

 def make_decisions(test_data, attributes, tree):
    decisions = []
    for row in test_data:
        decisions.append(tree.classify(attributes, row))

    return decisions

 def main():

    if len(sys.argv) < 2 or sys.argv[1] == None:
        raise ValueError("Please specify csv data file")

    name = sys.argv[1]
    data = None
    with open(name, mode='r') as file:
        r = csv.reader(file)
        data = [row for row in r]

    attributes = data[0]
    data = data[1:]

    tree = id3(data,attributes)
    print(tree.root)


    test_data = [[ '0', '1', '1', '1'],
                [ '0', '1', '0', '1'],
                [ '1', '1', '0', '0']]

    decisions = make_decisions(test_data, attributes, tree)
    
    print()
    for attr in attributes:
        print('{} '.format(attr), end='')
    print()

    for i, d in enumerate(test_data):
        for k in d:
            print('{} '.format(k), end='')
        print('Decision: {} '.format( decisions[i]))



 if __name__ == '__main__':
    main()
diff --git a/Makefile b/Makefile
 all:
 	./id3.py data.csv
 graph:
 	./graph.py data.csv
diff --git a/mushrooms.pdf b/mushrooms.pdf
usoara	mirositoare	arepete	neteda	comestibila
1	0	0	0	1
1	0	1	0	1
0	1	0	1	1
0	0	0	1	0
1	1	1	0	0
1	0	1	1	0
1	0	0	1	0
0	1	0	0	0
	#!/usr/bin/env python3

	import pandas as pd
	import sklearn
	from sklearn.datasets import load_iris
	from sklearn import tree
	import numpy as np
	import graphviz

	def retrieve_target_names(dataset):
	target = dataset['comestibila'].sort_values().values
	return (target, np.array(['necomestibila','comestibila']))

	def main():
	mushrooms = pd.read_csv('data.csv')
	feature_names = mushrooms.columns.tolist()[:4]
	target, target_names = retrieve_target_names(mushrooms)
	data = mushrooms[feature_names].values

	classifier = tree.DecisionTreeClassifier(criterion='entropy')
	classifier.fit(data, target)
	dot_data = tree.export_graphviz(classifier, out_file=None,
	feature_names=feature_names,
	class_names=target_names,
	filled=True, rounded=True,
	special_characters=True)
	graph = graphviz.Source(dot_data)
	graph.render("mushrooms")



	if __name__ == '__main__':
	main()
	#!/usr/bin/env python3

	import csv
	import sys
	import math
	import copy

	def entropy(partition):
	"""
	partition : list [2,5]
	"""

	total = sum(partition)
	entropy = 0
	# compute the entropy of all elements
	for element in partition:
	if element == 0:
	continue
	p = element/total # compute the probability
	entropy -= (p * math.log2(p))

	return entropy

	def ig(partitions):
	"""
	partitions: list [ [1,2], [2, 5] ]
	"""

	n = len(partitions)
	# make the root partition of all all the children
	root = [0 for l in range(0, n)]
	for column, partition in enumerate(partitions):
	for row, _ in enumerate(partition):
	root[column] += partitions[row][column]

	# compute the root entropy
	root_entropy = entropy(root)
	# get the number of instances of the decision stamp
	instances = sum(root)

	avg_entropy = 0
	for partition in partitions:
	part_sum = sum(partition)
	avg_entropy = avg_entropy + ((part_sum/instances) * entropy(partition))

	return root_entropy - avg_entropy

	def dmap(attributes, data):
	"""
	atributes: ['usoara', 'mirositoare', 'arepete', 'neteda', 'comestibila']

	data:
	['1', '0', '0', '0', '1']
	['1', '0', '1', '0', '1']
	['0', '1', '0', '1', '1']
	['0', '0', '0', '1', '0']
	['1', '1', '1', '0', '0']
	['1', '0', '1', '1', '0']
	['1', '0', '0', '1', '0']
	['0', '1', '0', '0', '0']


	res: key=>value

	usoara: ['1', '1', '0', '0', '1', '1', '1', '0']
	mirositoare: ['0', '0', '1', '0', '1', '0', '0', '1']
	arepete: ['0', '1', '0', '0', '1', '1', '0', '0']
	neteda: ['0', '0', '1', '1', '0', '1', '1', '0']
	comestibila: ['1', '1', '1', '0', '0', '0', '0', '0']

	"""
	m = len(data[0])
	n = len(data)

	columns = []
	for j in range(0, m):
	column = []
	for i in range(0, n):
	column.append(data[i][j])
	columns.append(column)

	return dict(zip(attributes, columns))

	def decision_stamps(dmap):
	"""

	dmap:

	usoara: ['1', '1', '0', '0', '1', '1', '1', '0']
	mirositoare: ['0', '0', '1', '0', '1', '0', '0', '1']
	arepete: ['0', '1', '0', '0', '1', '1', '0', '0']
	neteda: ['0', '0', '1', '1', '0', '1', '1', '0']
	comestibila: ['1', '1', '1', '0', '0', '0', '0', '0']

	res:
	{
	'usoara': {
	'0': {'0': 2, '1': 1},
	'1': {'0': 3, '1': 2}
	},
	'mirositoare': {
	'0': {'0': 3, '1': 2},
	'1': {'0': 2, '1': 1}
	},
	'arepete': {
	'0': {'0': 3, '1': 2},
	'1': {'0': 2, '1': 1}
	},
	'neteda': {
	'0': {'0': 2, '1': 2},
	'1': {'0': 3, '1': 1}
	}
	}
	"""
	ds = {}
	decision_key = [*dmap][-1]
	decisions = dmap[decision_key]

	for key, value in dmap.items():
	if key == decision_key:
	continue
	ds[key] = {}
	unique_values = list(set(value))
	for uv in unique_values:
	subset = []
	for i, v in enumerate(value):
	if v == uv:
	subset += [decisions[i]]
	unique_decision = list(set(decisions))
	uv_l = []
	for d in unique_decision:
	uv_l.append(subset.count(d))
	ds[key][uv] = dict(zip(unique_decision, uv_l))

	return ds

	def all_partitions(decision_stamps):
	"""
	decision_stamps:

	{
	'usoara': {
	'0': {'0': 2, '1': 1},
	'1': {'0': 3, '1': 2}
	},
	'mirositoare': {
	'0': {'0': 3, '1': 2},
	'1': {'0': 2, '1': 1}
	},
	'arepete': {
	'0': {'0': 3, '1': 2},
	'1': {'0': 2, '1': 1}
	},
	'neteda': {
	'0': {'0': 2, '1': 2},
	'1': {'0': 3, '1': 1}
	}
	}

	res:
	{
	'usoara': [[2, 3], [1, 2]],
	'mirositoare': [[1, 2], [2, 3]],
	'arepete': [[1, 2], [2, 3]],
	'neteda': [[1, 3], [2, 2]]
	}


	"""
	partitions = {}
	for key, decision_stamp in decision_stamps.items():
	partitions[key] = []
	for partition in decision_stamp.values():
	p = [*partition.values()]
	partitions[key].append(p)

	return partitions

	def best_attribute(partitions):
	"""
	partitions:

	{
	'usoara': [[2, 3], [1, 2]],
	'mirositoare': [[1, 2], [2, 3]],
	'arepete': [[1, 2], [2, 3]],
	'neteda': [[1, 3], [2, 2]]
	}


	res:
	neteda
	"""
	if len(partitions.values()) == 2:
	return [*partitions.keys()][0]

	max_ig = 0
	max_ig_name = ''
	for key, values in partitions.items():
	a = ig(values)
	if a > max_ig:
	max_ig = a
	max_ig_name = key

	return max_ig_name

	def filter_data(data, attributes, attribute, attribute_value):
	"""
	using data, attributes, target_attribute and target_attribute_value
	filter based on traget_attribute_value the data and return it's subset
	"""
	s = copy.deepcopy(data)
	subset = None

	index = attributes.index(attribute)
	subset = []
	for row in s:
	if row[index] == attribute_value:
	subset.append(row)
	s = subset

	return subset

	def remove_column(data, col):
	"""
	remove the hole column in our newly copied data
	"""
	d = copy.deepcopy(data)
	for row in d:
	row.pop(col)

	return d

	def pick_best_attribute(attributes, data):
	"""
	for the attributes and data given compute pick, select
	the best attribute that has the max information gain
	"""

	# if we are dealing with just tow attributes
	# this means we have just only one attribute to classify
	# and we return that exact attribute
	if len(attributes) == 2:
	attribute = attributes[0]
	dmapp = dmap(attribute, data)
	ds = decision_stamps(dmapp)
	# return attribute and his decision stamp
	return (attribute, ds)

	# make a mapping out of all attributes and data
	dmapp = dmap(attributes, data)
	# create decision stamps of the mapping
	ds = decision_stamps(dmapp)
	# return just the partitions of the decision stamps
	parts = all_partitions(ds)
	# for all the partitions compute the best attribute
	attribute = best_attribute(parts)

	# return the best attribute and his decision stamp
	return (attribute, ds)

	def pick_subset(data, attributes, attribute, vertice):
	"""
	for the given data, attributes, target_attribute, and his
	vertice
	pick the subset / instances that has includes the target_attribute
	vertice and return the subset and the corresponding attribute subset as a tuple
	"""
	# filter the data based on the attribute and vertice
	data = filter_data(data, attributes, attribute, vertice)
	# because attributes is a list, make a copy and preserve the original list
	attr = copy.copy(attributes)
	# compute the index and remove the attribute
	# from the list of attributes
	idx = attr.index(attribute) # compute the index of the attribute in attributes
	attr.remove(attribute) # remove the attribute from the list

	# after the filter_data process we should also
	# remove the column that contains the vertice value
	data = remove_column(data, idx)

	# return the subset pair data
	return (data, attr)

	class Node:
	"""
	Node represents a single decision node in our Id3
	tree. This will hold the attribute name, his neighbours and
	his decision stamp
	"""

	def __init__(self, attribute=None, stamp=None):
	if stamp == None and attribute == None:
	self.attribute = None
	self.stamp = None
	self.neighbours = None
	return

	self.attribute = attribute
	self.stamp = stamp
	self.neighbours = {}

	# for every stamp we have we should make now the decisions
	# of every vertices and if we can't make the decision we should
	# add in self.neighbours a None value
	for vertice, s in self.stamp.items():
	self.neighbours[vertice] = self._decision(s)

	def __repr__(self):
	return self.__str__()

	@property
	def vertices(self):
	return [*self.neighbours]

	def __str__(self):
	message = ''
	message += '[NB attribute = {}, '.format(self.attribute)
	message += 'stamp = {}, '.format(self.stamp)
	for key, value in self.neighbours.items():
	message += 'vertice:{} => decision\|node {} '.format(key, value)
	message += 'NE]'
	return message

	def _decision(self, s):
	"""
	for every decision_stamp value
	check if we can make a decision and classify our
	examples or we need to mark it as unknown for know
	"""
	aparitions = 0
	dec = None
	dict_values = s.values()

	values = [*dict_values]
	for v in values:
	if v > 0:
	dec = v
	aparitions = aparitions + 1

	if aparitions > 1:
	# this means we don't have a partition that classify
	# our instances perfectly
	return None

	keys = [*s.keys()] # get all keys of the dict
	return keys[values.index(dec)] # get the value key in the dict

	def empty(self):
	return (self.stamp == None and
	self.attribute == None and
	self.neighbours == None)

	def push(self, node):
	if self.empty():
	self.stamp = node.stamp
	self.attribute = node.attribute
	self.neighbours = node.neighbours
	return

	for neighbour in self.neighbours.values():
	if neighbour == None:
	neighbour = node

	def push_neighbours(self, vertice, node):
	if self.empty():
	self.stamp = node.stamp
	self.attribute = node.attribute
	self.neighbours = node.neighbours
	return

	if self.neighbours[vertice] != None:
	raise ValueError(
	'An already decision was made for vertice {} decision {}'.
	format(vertice, self.neighbours[vertice])
	)

	self.neighbours[vertice] = node

	def neighbour(self, vertice):
	return self.neighbours[vertice]

	def node_enighbours_are_classified(self):
	"""
	Do we still have neighbours that needs
	to be classified. If yes then return False
	else return True
	"""
	if self.empty():
	return False

	for v in self.neighbours.values():
	if v == None:
	return False

	return True

	class Tree(object):
	"""
	Tree is a general purpose tree that holds
	Id3 nodes
	"""

	# starting node
	root = None
	# maintain the current node
	current = None

	def empty(self):
	return (self.root == None and self.current == None)

	def classify(self, attributes, instance):
	node = self.root # take the root node
	decision = None
	while node != None :
	idx = attributes.index(node.attribute) # get the node attribute
	vertice = instance[idx] # retrieve the instance vertice of that attribute
	value = node.neighbour(vertice) # take the decision/node
	# if this is not a Node it's an decision , take it and stop
	if not isinstance(value, Node):
	decision = value
	break

	# this means we have a node, not a decision
	node = value

	return decision

	def push(self, new_node):
	"""
	push the new node maintaining
	the root and current balance
	"""

	if self.empty():
	self.root = new_node
	self.current = new_node
	return

	# push the node to the current not classify neighbour
	self.current.push(new_node)

	def push_neighbours(self, vertice, new_node):
	if self.empty():
	raise ValueError("Can't push neighbours to a empty tree")

	self.current.push_neighbours(vertice, new_node)
	if not new_node.node_enighbours_are_classified():
	if isinstance(value):
	self.current = new_node


	def id3(data, attributes):
	tree = Tree()

	attribute, ds = pick_best_attribute(attributes, data)

	node = Node(attribute, ds[attribute])
	tree.push(node)

	if node.node_enighbours_are_classified():
	return tree.root

	for vertice in node.vertices:
	subset = pick_subset(data, attributes, attribute, vertice)
	node = id3(*subset)
	tree.push_neighbours(vertice, node)

	return tree

	def make_decisions(test_data, attributes, tree):
	decisions = []
	for row in test_data:
	decisions.append(tree.classify(attributes, row))

	return decisions

	def main():

	if len(sys.argv) < 2 or sys.argv[1] == None:
	raise ValueError("Please specify csv data file")

	name = sys.argv[1]
	data = None
	with open(name, mode='r') as file:
	r = csv.reader(file)
	data = [row for row in r]

	attributes = data[0]
	data = data[1:]

	tree = id3(data,attributes)
	print(tree.root)


	test_data = [[ '0', '1', '1', '1'],
	[ '0', '1', '0', '1'],
	[ '1', '1', '0', '0']]

	decisions = make_decisions(test_data, attributes, tree)

	print()
	for attr in attributes:
	print('{} '.format(attr), end='')
	print()

	for i, d in enumerate(test_data):
	for k in d:
	print('{} '.format(k), end='')
	print('Decision: {} '.format( decisions[i]))



	if __name__ == '__main__':
	main()