Created
November 13, 2010 13:56
-
-
Save johnconroy/675347 to your computer and use it in GitHub Desktop.
Apply PageRank link authority algorithm to a list of Twitter users, as a first-pass measure for user authority
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pagerank calculation for a list of Twitter users | |
#implementing this was a bit of a head-scratcher... | |
f1="C:\\somedir\\list_of_edges_between_usrs.txt" #our social graph, as an edge-list | |
file1=open(f1,'r') | |
edges=file1.readlines() | |
l_nodes,l_outd,nodes2=[],[],[] | |
l_score, l_score_new, l_out_score=[],[],[] | |
#find all unique nodes | |
print 'getting unique nodes and outdegree of each...' | |
for x in range(len(edges)): | |
tab=edges[x].find('\t') | |
node=edges[x][:tab] | |
node2=edges[x][tab+1:-1] | |
nodes2.append(node2) | |
if node in l_nodes: | |
index=l_nodes.index(node) | |
l_outd[index]+=1 | |
else: | |
l_nodes.append(node) | |
l_outd.append(1) | |
nodes3=[] | |
for node2 in nodes2: | |
if node2 in nodes3: | |
pass | |
else: | |
nodes3.append(node2) | |
for x in range(len(nodes3)): | |
if nodes3[x] not in l_nodes: | |
l_nodes.append(nodes3[x]) | |
l_outd.append(1) | |
print 'unique nodes, outdegree done' | |
#create a parallel list of pagerank score. instantiate each score to 1 | |
print 'instantiating pagerank scores @ 1...' | |
for x in range(len(l_nodes)): | |
l_score.append(1.0) | |
for x in range(len(l_nodes)): | |
l_score_new.append(1.0) | |
print 'getting out-scores...' | |
for x in range(len(l_score)): | |
l_out_score.append(l_score[x]/l_outd[x]) | |
#get new pagerank scores | |
#iterate 49 times ... scores converge somewhere between 30-50 iterations (save every iteration to file to make sure they HAVE converged) | |
for n in range(49): | |
print 'Interation '+str(n) | |
fw="C:\\pagerank\\iter_"+str(n+1)+".txt" | |
filew=open(fw,'w') | |
#swap new list into old list, reset new list with 0s | |
for a in range(len(l_score)): | |
l_score[a]=l_score_new[a] | |
for b in range(len(l_score_new)): | |
l_score_new[b]=0.0 | |
#get new outbound scores | |
for c in range(len(l_out_score)): | |
l_out_score[c]=l_score[c]/l_outd[c] | |
#now get new scores based on new outbound scores | |
for x in range(len(edges)): | |
tab=edges[x].find('\t') | |
fllr=edges[x][:tab] | |
flld=edges[x][tab+1:-1] | |
fllr_index=l_nodes.index(fllr) | |
fllr_score=l_out_score[fllr_index] | |
flld_index=l_nodes.index(flld) | |
l_score_new[flld_index]+=fllr_score | |
#apply damping factor of d=.85 | |
d=0.85 | |
for m in range(len(l_score_new)): | |
l_score_new[m]=(1.0-d)+d*l_score_new[m] | |
for x in range(len(l_nodes)): | |
filew.write(str(l_nodes[x])+","+str(l_outd[x])+","+str(l_out_score[x])+','+str(l_score[x])+','+str(l_score_new[x])+'\n') | |
filew.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment