Created
October 13, 2012 21:00
-
-
Save weaselj/3886135 to your computer and use it in GitHub Desktop.
OPA2centralityTemplate for Coursera SNA class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Coursera SNA optional Programming Assignment 2 template | |
# load the igraph library | |
# you may have to install this module if you haven't already | |
import igraph as ig | |
import numpy as np | |
# read in the graph in GML format | |
# it is a sampled collection of pages from a strange set of seed categories: | |
# Math, Sociology, and Chemistry | |
# Change this to be your local file location | |
# if you are using Windows, replace the \ in the path with a double \, e.g. | |
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml") | |
g = ig.read('wikipedia.gml',format='gml') | |
# obtain summary information about the graph | |
print g.summary() | |
# obtain the undirected degree distribution (the GML file itself is directed) | |
degrees = g.degree() | |
# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells | |
# us that the distribution is power-law. | |
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at | |
# which you should start fitting the distribution) | |
# make sure you have executed all the code in plfit.R before running this function | |
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/ | |
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py | |
from plfit import plfit | |
a, xmin, L = plfit(degrees) | |
print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L | |
# plot the cumulative empirical distribution | |
y = map(degrees.count, range(max(degrees)+1)) | |
x = range(len(y)) | |
cumy = map(lambda i:float(sum(y[i:]))/sum(y), x) | |
import matplotlib.pyplot as plt | |
plt.loglog(x,cumy, lw=2) | |
plt.xlabel("degree k") | |
plt.xlabel("P(x) >= k") | |
# overlay the fitted distribution | |
startval = cumy[xmin] | |
xfit = range(xmin, max(x)) | |
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit) | |
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit) | |
plt.loglog(xfit,fittedvals, 'r--', lw=4) | |
# calculate the in and out degrees separately | |
# use the degree() function and options for calculating directed degree | |
# see the documentation here: http://igraph.sourceforge.net/documentation.html | |
# see which nodes have the max out and indegree | |
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so: | |
#maxodnode = g.vs[od.index(max(od))] | |
# find undirected betweenness scores and then nodes with the max betweenness | |
# warning, can be slow with large graphs, you may consider cutoff= instead | |
bb = g.betweenness(directed=False, cutoff=5) | |
maxbbnode = g.vs[bb.index(max(bb))] | |
print 'Max betweenness: ', maxbbnode['label'] | |
# this high betweennes node may seem a bit surprising | |
# you can check out its neighbors like this. | |
print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label'] | |
# calculate Page Rank and find the node having the highest pagerank | |
# you'll want the $vector portion of the answer returned | |
# the assignment doesn't ask about this, but it's good to know how to do this... | |
pr = g.pagerank() | |
maxprnode = g.vs[pr.index(max(pr))] | |
print 'Max Page Rank: ', maxprnode | |
# calculate the Bonacich alpha-centrality of a lattice | |
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense | |
# to me for these values of alpha | |
glfour = ig.Graph.Lattice([4,4]) | |
#unfortunately igraph python doesn't implement alpha centrality either | |
#ac = g.alpha_centrality(alpha=-0.5) | |
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac) | |
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size) | |
#ac = g.alpha_centrality(alpha=+0.25) | |
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac) | |
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Coursera SNA optional Programming Assignment 2 template | |
# load the igraph library | |
# you may have to install this module if you haven't already | |
import networkx as nx | |
import numpy as np | |
# read in the graph in GML format | |
# it is a sampled collection of pages from a strange set of seed categories: | |
# Math, Sociology, and Chemistry | |
# Change this to be your local file location | |
# if you are using Windows, replace the \ in the path with a double \, e.g. | |
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml") | |
g = nx.read_gml('wikipedia.gml',relabel=True) | |
# obtain summary information about the graph | |
print nx.info(g) | |
# obtain the undirected degree distribution (the GML file itself is directed) | |
degrees = g.degree().values() | |
# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells | |
# us that the distribution is power-law. | |
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at | |
# which you should start fitting the distribution) | |
# make sure you have executed all the code in plfit.R before running this function | |
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/ | |
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py | |
from plfit import plfit | |
a, xmin, L = plfit(degrees) | |
print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L | |
# plot the cumulative empirical distribution | |
y = nx.degree_histogram(g) | |
x = range(len(y)) | |
cumy = map(lambda i:float(sum(y[i:]))/sum(y), x) | |
import matplotlib.pyplot as plt | |
plt.loglog(x,cumy, lw=2) | |
plt.xlabel("degree k") | |
plt.xlabel("P(x) >= k") | |
# overlay the fitted distribution | |
startval = cumy[xmin] | |
xfit = range(xmin, max(x)) | |
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit) | |
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit) | |
plt.loglog(xfit,fittedvals, 'r--', lw=4) | |
# calculate the in and out degrees separately | |
# use the degree() function and options for calculating directed degree | |
# see the documentation here: http://igraph.sourceforge.net/documentation.html | |
# see which nodes have the max out and indegree | |
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so: | |
#g.nodes()[od.index(max(od))] | |
# find undirected betweenness scores and then nodes with the max betweenness | |
# warning, can be slow with large graphs, you may consider k= instead | |
ug = g.to_undirected() | |
bb = nx.betweenness_centrality(ug) | |
maxbbnode = g.nodes()[bb.index(max(bb))] | |
print 'Max betweenness: ', maxbbnode | |
# this high betweennes node may seem a bit surprising | |
# you can check out its neighbors like this. | |
print 'Neighbors of max betweenness: ', g.neighbors(maxbbnode) | |
# calculate Page Rank and find the node having the highest pagerank | |
# you'll want the $vector portion of the answer returned | |
# the assignment doesn't ask about this, but it's good to know how to do this... | |
import operator as op | |
pr = nx.pagerank(g) | |
maxprnode = max(pr.iteritems(), key=op.itemgetter(1))[0] | |
print 'Max Page Rank: ', maxprnode | |
# calculate the Bonacich alpha-centrality of a lattice | |
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense | |
# to me for these values of alpha | |
glfour = nx.grid_2d_graph(4,4) | |
#unfortunately netwrokx doesn't implement alpha centrality | |
#ac = nx.alpha_centrality(glfour, alpha=-0.5) | |
acw = map(lambda x: ac[x], glfour.nodes()) | |
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw) | |
nx.draw_networkx(glfour, node_size=ac_node_size) | |
#ac = nx.alpha_centrality(glfour, alpha=+0.25) | |
acw = map(lambda x: ac[x], glfour.nodes()) | |
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw) | |
nx.draw_networkx(glfour, node_size=ac_node_size) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment