Skip to content

Instantly share code, notes, and snippets.

@yannisxu
Created February 25, 2017 05:25
Show Gist options
  • Save yannisxu/3cb1a7aec466182355c770b5cec15712 to your computer and use it in GitHub Desktop.
Save yannisxu/3cb1a7aec466182355c770b5cec15712 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# @Author: xuyannis
# @Date: 2017-02-25 12:35:15
# @Last Modified by: xuyannis
# @Last Modified time: 2017-02-25 13:22:54
import numpy as np
from numpy import linalg as la
import pandas as pd
def cosineSimilarity (vec1, vec2):
inA = np.mat(vec1)
inB = np.mat(vec2)
num = float(inA * inB.T) #若为行向量: A * B.T
donom = la.norm(inA) * la.norm(inB) ##余弦值
return 0.5+ 0.5*(num / donom) # 归一化
def pearsonSimilar(inA,inB):
if len(inA)<3:
return 1.0
return 0.5+0.5*np.corrcoef(inA,inB,rowvar=0)[0][1]
df = pd.read_csv('30.csv')
vec_pv = df.pv
vec_pv_real = df.pv_real
vec_cost = df.cost
vec_pv_ad = df.pv_ad
print "PV与无爬虫PV相似度"
print pearsonSimilar(vec_pv, vec_pv_real)
print "PV与消耗相似度"
print pearsonSimilar(vec_pv, vec_cost)
print "无爬虫PV与消耗相似度"
print pearsonSimilar(vec_pv_real, vec_cost)
print "PV与广告PV相似度"
print pearsonSimilar(vec_pv, vec_pv_ad)
print "无爬虫PV与广告PV相似度"
print pearsonSimilar(vec_pv_real, vec_pv_ad)
print "广告PV与广告消耗相似度"
print pearsonSimilar(vec_pv, vec_cost)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment