wy36101299 · August 29, 2015 14:15
diff --git a/collaborative-filtering.py b/collaborative-filtering.py
 import numpy as np
 # linalg:Linear algebra

 #     日式  中式  美式  泰式  韓式
 # ---------------------------
 # sam  2    0    0    4    4
 #john  5    5    5    3    3
 # tim  2    4    2    1    2
 #以下矩陣可看此 user 對 item 進行評分
 #藉由協同過濾 猜出 sam對中式和美式的評價後，推薦sam吃什麼

 def loadItemScore():
    tmp = [[4, 4, 0, 2, 2],
           [4, 0, 0, 3, 3],
           [4, 0, 0, 1, 1],
           [1, 1, 1, 2, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]]
    itemScore = np.array(tmp,dtype=np.float)
    return itemScore
 def loadItemScore2():
    tmp = [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
    itemScore = np.array(tmp,dtype=np.float)
    return itemScore
 # SVD 矩陣奇異值分解
 # 利用SVD奇異值分解可以大幅減低矩陣的儲存量
 U,sigma,VT  = np.linalg.svd(loadItemScore2())
 # 矩陣能量為 sum(sigma**2) 需保留90%為典型作法
 sig2 = sigma**2
 print("矩陣總能量為：")
 print(sum(sig2))
 print("90%矩陣能量：")
 print(sum(sig2)*0.9)
 print("轉換為二維矩陣能量：")
 print(sum(sig2[:2]))
 print("轉換三維矩陣能量：")
 print(sum(sig2[:3]))
 # 轉換為二維能量太低，需為三維 svdMatrix則為還原後的矩陣
 Sig4 = np.mat(np.eye(3)*sigma[:3])
 svdMatrix = U[:,:3] * Sig4 * VT[:3,:]
 # 資料來源：http://en.wikipedia.org/wiki/Singular_value_decomposition

 # Euclid Similarity
 def eulidSim(a,b):
    eulid = np.linalg.norm(a-b)
    norEulid = 1.0/(1.0 + eulid)
    return norEulid
    
 # Adjusted Cosine Similarity 
 # 不超過3 Similarity ＝1
 def adjCosSim(a,b):
    if len(a)<3:
        return 1
    else:
        a = a-a.mean()
        b = b-b.mean()
        cos = float(np.inner(a,b))/(np.linalg.norm(a)*np.linalg.norm(b))
        norCos = 0.5+0.5*cos
        return norCos

 # Cosine Similarity 
 def cosSim(a,b):
    cos = float(np.inner(a,b))/(np.linalg.norm(a)*np.linalg.norm(b))
    norCos = 0.5+0.5*cos
    return norCos
    
 # Person Correlation Coefficient Similarity
 # 不超過3 Similarity ＝1
 def pearsSim(a,b):
    if len(a)<3:
        return 1
    else:
        corcoef = np.corrcoef(a,b,rowvar=0)[0][1]
        norCorcoef = 0.5+0.5*corcoef
        return norCorcoef
 # 資料來源：http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient

 # Person Correlation Coefficient Similarity 和 Adjusted Cosine Similarity 比較
 # In pearson correlation,減去該物品ㄉ
 # In adjusted cosine correlation 不同使用者可能對物品會有較高或是較低的評分表現 因此減去使用者平均的評分 e.x.(4,5)(1,2)視為行為相似
 # 資料來源：http://www.cs.carleton.edu/cs_comps/0607/recommend/recommender/itembased.html

 # 概念藉由使用者都評分物品的分數做相似度運算，其結果＊該未評分之物品 總和取平均 即為可能的分數
 def itemSimRec(itemScore, user, simMethods, item):
    # user number : shape(itemScore)[0] item number : shape(itemScore)[1]
    n = np.shape(itemScore)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    for i in range(n):
        userItemRating = itemScore[user,i]
        # pass norating item
        if userItemRating == 0 or i==item:
            pass
        else:
            # find same rating item
            sameRatingItem = np.nonzero(np.logical_and(itemScore[:,item] > 0,itemScore[:,i] > 0))[0]
            if len(sameRatingItem) == 0: 
                similarity = 0
            else: 
                similarity = simMethods(itemScore[sameRatingItem,item],itemScore[sameRatingItem,i])
 #                 print(itemScore[sameRatingItem,item])
 #                 print(itemScore[sameRatingItem,i])
 #             print 'the %d and %d similarity is: %f' % (item, i, similarity)
            simTotal += similarity
            ratSimTotal += similarity * userItemRating
    if simTotal == 0: 
        return 0
    else: 
        return ratSimTotal/simTotal
 
 def recommend(itemScore, user, N=3, simMethods=cosSim, estMethod=itemSimRec):
    #find unrated items 
    unratedItems = np.nonzero(itemScore[user,:] ==0)[0]
    if len(unratedItems) == 0: 
        return 'you rated everything'
    else:
        itemScores = []
        for item in unratedItems:
            estimatedScore = estMethod(itemScore, user, simMethods, item)
            itemScores.append((item, estimatedScore))
            recItem = sorted(itemScores, key=lambda x: x[1], reverse=True)[:N]
        return recItem
 
 # SVD分解再還原 只有有多出SVD的部分，其餘和itemSimRec算法相同
 def svdItemSimRec(itemScore, user, simMethods, item):
    n = np.shape(itemScore)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U,Sigma,VT = np.linalg.svd(itemScore)
    Sig4 = np.mat(np.eye(3)*Sigma[:3]) #arrange Sig4 into a diagonal matrix
    xformedItems = U[:,:3] * Sig4 * VT[:3,:]  #create transformed items    
    for i in range(n):
        userItemRating = itemScore[user,i]
        # pass norating item
        if userItemRating == 0 or i==item:
            pass
        else:
            # find same rating item
            sameRatingItem = np.nonzero(np.logical_and(itemScore[:,item] > 0,itemScore[:,i] > 0))[0]
            if len(sameRatingItem) == 0: 
                similarity = 0
            else: 
                similarity = simMethods(itemScore[sameRatingItem,item],itemScore[sameRatingItem,i])
 #                 print(itemScore[sameRatingItem,item])
 #                 print(itemScore[sameRatingItem,i])
 #             print 'the %d and %d similarity is: %f' % (item, i, similarity)
            simTotal += similarity
            ratSimTotal += similarity * userItemRating
    if simTotal == 0: 
        return 0
    else: 
        return ratSimTotal/simTotal
 
 # [(4, 5.0), (9, 5.0), (10, 4.7297297297297298)]
 # 物品4 分數5 物品9 分數5 物品10 分數4.7 依序推薦
 print("estMethod=itemSimRec")
 print("--------------------")
 print("simMethods=eulidSim")
 print(recommend(loadItemScore2(),4,simMethods=eulidSim,estMethod=itemSimRec))
 print("simMethods=cosSim")
 print(recommend(loadItemScore2(),4,simMethods=cosSim,estMethod=itemSimRec))
 print("simMethods=adjCosSim")
 print(recommend(loadItemScore2(),4,simMethods=adjCosSim,estMethod=itemSimRec))
 print("simMethods=pearsSim")
 print(recommend(loadItemScore2(),4,simMethods=pearsSim,estMethod=itemSimRec))

 # 照理說經過SVD分解後，可大幅減少儲存量，但對於矩陣還原可能會失真，故會比itemSimRec中有更多誤差存在
 print("estMethod=svdItemSimRec")
 print("-----------------------")
 print("simMethods=eulidSim")
 print(recommend(loadItemScore2(),4,simMethods=eulidSim,estMethod=svdItemSimRec))
 print("simMethods=cosSim")
 print(recommend(loadItemScore2(),4,simMethods=cosSim,estMethod=svdItemSimRec))
 print("simMethods=adjCosSim")
 print(recommend(loadItemScore2(),4,simMethods=adjCosSim,estMethod=svdItemSimRec))
 print("simMethods=pearsSim")
 print(recommend(loadItemScore2(),4,simMethods=pearsSim,estMethod=svdItemSimRec))
	import numpy as np
	# linalg:Linear algebra

	# 日式中式美式泰式韓式
	# ---------------------------
	# sam 2 0 0 4 4
	#john 5 5 5 3 3
	# tim 2 4 2 1 2
	#以下矩陣可看此 user 對 item 進行評分
	#藉由協同過濾猜出 sam對中式和美式的評價後，推薦sam吃什麼

	def loadItemScore():
	tmp = [[4, 4, 0, 2, 2],
	[4, 0, 0, 3, 3],
	[4, 0, 0, 1, 1],
	[1, 1, 1, 2, 0],
	[2, 2, 2, 0, 0],
	[5, 5, 5, 0, 0],
	[1, 1, 1, 0, 0]]
	itemScore = np.array(tmp,dtype=np.float)
	return itemScore
	def loadItemScore2():
	tmp = [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
	[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
	[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
	[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
	[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
	[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
	[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
	[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
	[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
	[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
	[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
	itemScore = np.array(tmp,dtype=np.float)
	return itemScore
	# SVD 矩陣奇異值分解
	# 利用SVD奇異值分解可以大幅減低矩陣的儲存量
	U,sigma,VT = np.linalg.svd(loadItemScore2())
	# 矩陣能量為 sum(sigma**2) 需保留90%為典型作法
	sig2 = sigma**2
	print("矩陣總能量為：")
	print(sum(sig2))
	print("90%矩陣能量：")
	print(sum(sig2)*0.9)
	print("轉換為二維矩陣能量：")
	print(sum(sig2[:2]))
	print("轉換三維矩陣能量：")
	print(sum(sig2[:3]))
	# 轉換為二維能量太低，需為三維 svdMatrix則為還原後的矩陣
	Sig4 = np.mat(np.eye(3)*sigma[:3])
	svdMatrix = U[:,:3] * Sig4 * VT[:3,:]
	# 資料來源：http://en.wikipedia.org/wiki/Singular_value_decomposition

	# Euclid Similarity
	def eulidSim(a,b):
	eulid = np.linalg.norm(a-b)
	norEulid = 1.0/(1.0 + eulid)
	return norEulid

	# Adjusted Cosine Similarity
	# 不超過3 Similarity ＝1
	def adjCosSim(a,b):
	if len(a)<3:
	return 1
	else:
	a = a-a.mean()
	b = b-b.mean()
	cos = float(np.inner(a,b))/(np.linalg.norm(a)*np.linalg.norm(b))
	norCos = 0.5+0.5*cos
	return norCos

	# Cosine Similarity
	def cosSim(a,b):
	cos = float(np.inner(a,b))/(np.linalg.norm(a)*np.linalg.norm(b))
	norCos = 0.5+0.5*cos
	return norCos

	# Person Correlation Coefficient Similarity
	# 不超過3 Similarity ＝1
	def pearsSim(a,b):
	if len(a)<3:
	return 1
	else:
	corcoef = np.corrcoef(a,b,rowvar=0)[0][1]
	norCorcoef = 0.5+0.5*corcoef
	return norCorcoef
	# 資料來源：http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient

	# Person Correlation Coefficient Similarity 和 Adjusted Cosine Similarity 比較
	# In pearson correlation,減去該物品ㄉ
	# In adjusted cosine correlation 不同使用者可能對物品會有較高或是較低的評分表現因此減去使用者平均的評分 e.x.(4,5)(1,2)視為行為相似
	# 資料來源：http://www.cs.carleton.edu/cs_comps/0607/recommend/recommender/itembased.html

	# 概念藉由使用者都評分物品的分數做相似度運算，其結果＊該未評分之物品總和取平均即為可能的分數
	def itemSimRec(itemScore, user, simMethods, item):
	# user number : shape(itemScore)[0] item number : shape(itemScore)[1]
	n = np.shape(itemScore)[1]
	simTotal = 0.0; ratSimTotal = 0.0
	for i in range(n):
	userItemRating = itemScore[user,i]
	# pass norating item
	if userItemRating == 0 or i==item:
	pass
	else:
	# find same rating item
	sameRatingItem = np.nonzero(np.logical_and(itemScore[:,item] > 0,itemScore[:,i] > 0))[0]
	if len(sameRatingItem) == 0:
	similarity = 0
	else:
	similarity = simMethods(itemScore[sameRatingItem,item],itemScore[sameRatingItem,i])
	# print(itemScore[sameRatingItem,item])
	# print(itemScore[sameRatingItem,i])
	# print 'the %d and %d similarity is: %f' % (item, i, similarity)
	simTotal += similarity
	ratSimTotal += similarity * userItemRating
	if simTotal == 0:
	return 0
	else:
	return ratSimTotal/simTotal

	def recommend(itemScore, user, N=3, simMethods=cosSim, estMethod=itemSimRec):
	#find unrated items
	unratedItems = np.nonzero(itemScore[user,:] ==0)[0]
	if len(unratedItems) == 0:
	return 'you rated everything'
	else:
	itemScores = []
	for item in unratedItems:
	estimatedScore = estMethod(itemScore, user, simMethods, item)
	itemScores.append((item, estimatedScore))
	recItem = sorted(itemScores, key=lambda x: x[1], reverse=True)[:N]
	return recItem

	# SVD分解再還原只有有多出SVD的部分，其餘和itemSimRec算法相同
	def svdItemSimRec(itemScore, user, simMethods, item):
	n = np.shape(itemScore)[1]
	simTotal = 0.0; ratSimTotal = 0.0
	U,Sigma,VT = np.linalg.svd(itemScore)
	Sig4 = np.mat(np.eye(3)*Sigma[:3]) #arrange Sig4 into a diagonal matrix
	xformedItems = U[:,:3] * Sig4 * VT[:3,:] #create transformed items
	for i in range(n):
	userItemRating = itemScore[user,i]
	# pass norating item
	if userItemRating == 0 or i==item:
	pass
	else:
	# find same rating item
	sameRatingItem = np.nonzero(np.logical_and(itemScore[:,item] > 0,itemScore[:,i] > 0))[0]
	if len(sameRatingItem) == 0:
	similarity = 0
	else:
	similarity = simMethods(itemScore[sameRatingItem,item],itemScore[sameRatingItem,i])
	# print(itemScore[sameRatingItem,item])
	# print(itemScore[sameRatingItem,i])
	# print 'the %d and %d similarity is: %f' % (item, i, similarity)
	simTotal += similarity
	ratSimTotal += similarity * userItemRating
	if simTotal == 0:
	return 0
	else:
	return ratSimTotal/simTotal

	# [(4, 5.0), (9, 5.0), (10, 4.7297297297297298)]
	# 物品4 分數5 物品9 分數5 物品10 分數4.7 依序推薦
	print("estMethod=itemSimRec")
	print("--------------------")
	print("simMethods=eulidSim")
	print(recommend(loadItemScore2(),4,simMethods=eulidSim,estMethod=itemSimRec))
	print("simMethods=cosSim")
	print(recommend(loadItemScore2(),4,simMethods=cosSim,estMethod=itemSimRec))
	print("simMethods=adjCosSim")
	print(recommend(loadItemScore2(),4,simMethods=adjCosSim,estMethod=itemSimRec))
	print("simMethods=pearsSim")
	print(recommend(loadItemScore2(),4,simMethods=pearsSim,estMethod=itemSimRec))

	# 照理說經過SVD分解後，可大幅減少儲存量，但對於矩陣還原可能會失真，故會比itemSimRec中有更多誤差存在
	print("estMethod=svdItemSimRec")
	print("-----------------------")
	print("simMethods=eulidSim")
	print(recommend(loadItemScore2(),4,simMethods=eulidSim,estMethod=svdItemSimRec))
	print("simMethods=cosSim")
	print(recommend(loadItemScore2(),4,simMethods=cosSim,estMethod=svdItemSimRec))
	print("simMethods=adjCosSim")
	print(recommend(loadItemScore2(),4,simMethods=adjCosSim,estMethod=svdItemSimRec))
	print("simMethods=pearsSim")
	print(recommend(loadItemScore2(),4,simMethods=pearsSim,estMethod=svdItemSimRec))