Last active
August 29, 2015 14:12
-
-
Save wy36101299/23a125a09a0904c24a75 to your computer and use it in GitHub Desktop.
Hierarchical algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
class point: | |
def __init__(self,vfeature,vlabel): | |
self.feature = vfeature | |
self.label = vlabel | |
class cluster: | |
def __init__(self,numbercluster): | |
self.numbercluster = numbercluster | |
self.cluster = {} | |
# 算出兩兩point的集合 | |
def setDistance(points): | |
pointsDistance=[] | |
copypoints = list(points) | |
for a in points: | |
for b in copypoints: | |
if a != b: | |
pointsDistance.append([a,b]) | |
copypoints.remove(a) | |
return pointsDistance | |
# 看每個點的分類 | |
def show(): | |
for index, item in enumerate(points): | |
t="" | |
for findex, fitem in enumerate(item.feature): | |
t += str(findex)+':'+str(fitem)+'\t' | |
print(str(index)+':'+"分類"+str(item.label)+"\tfeature-"+str(t)) | |
# 每一筆data | |
points=[] | |
#每一次分類的紀錄 | |
timeclusterlist=[] | |
# 載入data | |
filenamme = '/Users/wy/Desktop/hdata.txt' | |
with open (filenamme,'r') as f: | |
lines = f.readlines() | |
for index, line in enumerate(lines): | |
line=line.strip() | |
listLine = map(float,line.split(',')) | |
index = point(listLine,str(index)) | |
points.append(index) | |
# 分類的number | |
linenumber = len(points)-1 | |
# 運行Hierarchical的次數 | |
clusternumber = len(points) | |
# 幾個feature | |
dimension = len(listLine) | |
# 把object轉為number 再算兩兩point距離的集合 | |
def setcluster(): | |
# 第一次 要初始化 | |
if len(timeclusterlist) ==0: | |
d={} | |
clustertmp = cluster(clusternumber) | |
diccluster = clustertmp.cluster | |
for index, item in enumerate(points): | |
d[points[index].label] = d.get(points[index].label,[]) | |
d[points[index].label].append(item) | |
clustertmp.cluster = d | |
timeclusterlist.append(clustertmp) | |
ltmp=[] | |
for a in timeclusterlist[len(timeclusterlist)-1].cluster.keys(): | |
ltmp.append(a) | |
setdistance = setDistance(ltmp) | |
else: | |
ltmp=[] | |
for a in timeclusterlist[len(timeclusterlist)-1].cluster.keys(): | |
ltmp.append(a) | |
setdistance = setDistance(ltmp) | |
return setdistance | |
def Hierarchical(linenumber,clusternumber): | |
setdistance = setcluster() | |
clusterDict = timeclusterlist[len(timeclusterlist)-1].cluster | |
tp=[] | |
# 找出最短路徑的集合 | |
for point in setdistance: | |
listPointA = clusterDict[point[0]] | |
listPointB = clusterDict[point[1]] | |
listNumberA = len(listPointA) | |
listNumberB = len(listPointB) | |
# A團有兩個點以上,B只有一個點 | |
if listNumberA > 1 and listNumberB == 1: | |
listError = [] | |
for point in range(listNumberA): | |
sqError = 0 | |
for indexfea in range(dimension): | |
sqError += ( listPointA[point].feature[indexfea]-listPointB[0].feature[indexfea] )**2 | |
listerror.append(sqError) | |
tp.append(min(listError)) | |
# print('a') | |
# A只有一個點,B團有兩個點以上 | |
elif listNumberA == 1 and listNumberB > 1: | |
listError = [] | |
for point in range(listNumberB): | |
sqError = 0 | |
for indexfea in range(dimension): | |
sqError += ( listPointB[point].feature[indexfea]-listPointA[0].feature[indexfea] )**2 | |
listError.append(sqError) | |
tp.append(min(listError)) | |
# print('b') | |
# A團有兩個點以上,B團有兩個點以上 | |
elif listNumberA > 1 and listNumberB > 1: | |
listError = [] | |
for pointa in range(listNumberA): | |
for pointb in range(listNumberB): | |
sqError = 0 | |
for indexfea in range(dimension): | |
sqError += ( listPointA[pointa].feature[indexfea]-listPointB[pointb].feature[indexfea] )**2 | |
listError.append(sqError) | |
tp.append(min(listError)) | |
# print('c') | |
# A只有一個點,B只有一個點 | |
elif listNumberA == 1 and listNumberB == 1: | |
sqError = 0 | |
for indexfea in range(dimension): | |
sqError += (listPointA[0].feature[indexfea]-listPointB[0].feature[indexfea])**2 | |
tp.append(sqError) | |
# print('d') | |
else: | |
print('error') | |
indSetdistance = tp.index(min(tp)) | |
# 印出該距離最小值 | |
print(tp) | |
# 兩兩point的集合 | |
print(setdistance) | |
# 第N個最短距離之集合 | |
print(indSetdistance) | |
# 最短距離,point的聚合 | |
listPointA = clusterDict[setdistance[indSetdistance][0]] | |
listPointB = clusterDict[setdistance[indSetdistance][1]] | |
listNumberA = len(listPointA) | |
listNumberB = len(listPointB) | |
if listNumberA > 1: | |
for point in range(listNumberA): | |
listPointA[point].label = str(linenumber) | |
else: | |
listPointA[0].label = str(linenumber) | |
if listNumberB > 1: | |
for point in range(listNumberB): | |
listPointB[point].label = str(linenumber) | |
else: | |
listPointB[0].label = str(linenumber) | |
# timeclusterlist存入分類紀錄 | |
d={} | |
clustertmp = cluster(clusternumber) | |
diccluster = clustertmp.cluster | |
for index, item in enumerate(points): | |
d[points[index].label] = d.get(points[index].label,[]) | |
d[points[index].label].append(item) | |
clustertmp.cluster = d | |
timeclusterlist.append(clustertmp) | |
# 列出cluster情形 | |
show() | |
# run | |
for time in range(clusternumber-1): | |
linenumber+=1 | |
Hierarchical(linenumber,time-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment