weaselj · October 13, 2012 21:00
diff --git a/OPA2centralityTemplateIGraph.py b/OPA2centralityTemplateIGraph.py
 # Coursera SNA optional Programming Assignment 2 template

 # load the igraph library
 # you may have to install this module if you haven't already
 import igraph as ig
 import numpy as np

 # read in the graph in GML format
 # it is a sampled collection of pages from a strange set of seed categories:
 # Math, Sociology, and Chemistry
 # Change this to be your local file location
 #  if you are using Windows, replace the \ in the path with a double \, e.g.
 # g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

 g = ig.read('wikipedia.gml',format='gml')

 # obtain summary information about the graph
 print g.summary()

 # obtain the undirected degree distribution (the GML file itself is directed)
 degrees = g.degree()

 # fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells 
 # us that the distribution is power-law.
 # Also, look for the estimated power-law exponent $alpha and $xmin (the point at 
 # which you should start fitting the distribution)
 # make sure you have executed all the code in plfit.R before running this function
 # an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
 # you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
 from plfit import plfit
 a, xmin, L = plfit(degrees)
 print 'Power-Law fit - alpha:',  a, ' x-min: ', xmin, ' log-likelihood: ', L

 # plot the cumulative empirical distribution
 y = map(degrees.count, range(max(degrees)+1))
 x = range(len(y))

 cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

 import matplotlib.pyplot as plt

 plt.loglog(x,cumy, lw=2)	
 plt.xlabel("degree k")
 plt.xlabel("P(x) >= k")

 # overlay the fitted distribution
 startval = cumy[xmin]
 xfit = range(xmin, max(x))
 fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
 #fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
 plt.loglog(xfit,fittedvals, 'r--', lw=4)

 # calculate the in and out degrees separately
 # use the degree() function and options for calculating directed degree
 # see the documentation here: http://igraph.sourceforge.net/documentation.html

 # see which nodes have the max out and indegree
 # for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
 #maxodnode = g.vs[od.index(max(od))]

 # find undirected betweenness scores and then nodes with the max betweenness
 # warning, can be slow with large graphs, you may consider cutoff= instead
 bb = g.betweenness(directed=False, cutoff=5)
 maxbbnode = g.vs[bb.index(max(bb))]
 print 'Max betweenness: ', maxbbnode['label']

 # this high betweennes node may seem a bit surprising
 # you can check out its neighbors like this.
 print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label']

 # calculate Page Rank and find the node having the highest pagerank
 # you'll want the $vector portion of the answer returned
 # the assignment doesn't ask about this, but it's good to know how to do this...
 pr = g.pagerank()
 maxprnode = g.vs[pr.index(max(pr))]
 print 'Max Page Rank: ', maxprnode

 # calculate the Bonacich alpha-centrality of a lattice
 # I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
 # to me for these values of alpha
 glfour = ig.Graph.Lattice([4,4])
 #unfortunately igraph python doesn't implement alpha centrality either
 #ac = g.alpha_centrality(alpha=-0.5)
 ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
 ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)

 #ac = g.alpha_centrality(alpha=+0.25)
 ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
 ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)
diff --git a/OPA2centralityTemplateNetworkX.py b/OPA2centralityTemplateNetworkX.py
 # Coursera SNA optional Programming Assignment 2 template

 # load the igraph library
 # you may have to install this module if you haven't already
 import networkx as nx
 import numpy as np

 # read in the graph in GML format
 # it is a sampled collection of pages from a strange set of seed categories:
 # Math, Sociology, and Chemistry
 # Change this to be your local file location
 #  if you are using Windows, replace the \ in the path with a double \, e.g.
 # g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

 g = nx.read_gml('wikipedia.gml',relabel=True)

 # obtain summary information about the graph
 print nx.info(g)

 # obtain the undirected degree distribution (the GML file itself is directed)
 degrees = g.degree().values()

 # fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells 
 # us that the distribution is power-law.
 # Also, look for the estimated power-law exponent $alpha and $xmin (the point at 
 # which you should start fitting the distribution)
 # make sure you have executed all the code in plfit.R before running this function
 # an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
 # you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
 from plfit import plfit
 a, xmin, L = plfit(degrees)
 print 'Power-Law fit - alpha:',  a, ' x-min: ', xmin, ' log-likelihood: ', L

 # plot the cumulative empirical distribution
 y = nx.degree_histogram(g)
 x = range(len(y))

 cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

 import matplotlib.pyplot as plt

 plt.loglog(x,cumy, lw=2)	
 plt.xlabel("degree k")
 plt.xlabel("P(x) >= k")

 # overlay the fitted distribution
 startval = cumy[xmin]
 xfit = range(xmin, max(x))
 fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
 #fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
 plt.loglog(xfit,fittedvals, 'r--', lw=4)

 # calculate the in and out degrees separately
 # use the degree() function and options for calculating directed degree
 # see the documentation here: http://igraph.sourceforge.net/documentation.html

 # see which nodes have the max out and indegree
 # for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
 #g.nodes()[od.index(max(od))]

 # find undirected betweenness scores and then nodes with the max betweenness
 # warning, can be slow with large graphs, you may consider k= instead
 ug = g.to_undirected()
 bb = nx.betweenness_centrality(ug)
 maxbbnode = g.nodes()[bb.index(max(bb))]
 print 'Max betweenness: ', maxbbnode

 # this high betweennes node may seem a bit surprising
 # you can check out its neighbors like this.
 print 'Neighbors of max betweenness: ', g.neighbors(maxbbnode)

 # calculate Page Rank and find the node having the highest pagerank
 # you'll want the $vector portion of the answer returned
 # the assignment doesn't ask about this, but it's good to know how to do this...
 import operator as op
 pr = nx.pagerank(g)
 maxprnode = max(pr.iteritems(), key=op.itemgetter(1))[0]
 print 'Max Page Rank: ', maxprnode

 # calculate the Bonacich alpha-centrality of a lattice
 # I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
 # to me for these values of alpha
 glfour = nx.grid_2d_graph(4,4)
 #unfortunately netwrokx doesn't implement alpha centrality
 #ac = nx.alpha_centrality(glfour, alpha=-0.5)
 acw = map(lambda x: ac[x], glfour.nodes())
 ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
 nx.draw_networkx(glfour, node_size=ac_node_size)

 #ac = nx.alpha_centrality(glfour, alpha=+0.25)
 acw = map(lambda x: ac[x], glfour.nodes())
 ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
 nx.draw_networkx(glfour, node_size=ac_node_size)
	# Coursera SNA optional Programming Assignment 2 template

	# load the igraph library
	# you may have to install this module if you haven't already
	import igraph as ig
	import numpy as np

	# read in the graph in GML format
	# it is a sampled collection of pages from a strange set of seed categories:
	# Math, Sociology, and Chemistry
	# Change this to be your local file location
	# if you are using Windows, replace the \ in the path with a double \, e.g.
	# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

	g = ig.read('wikipedia.gml',format='gml')

	# obtain summary information about the graph
	print g.summary()

	# obtain the undirected degree distribution (the GML file itself is directed)
	degrees = g.degree()

	# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
	# us that the distribution is power-law.
	# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
	# which you should start fitting the distribution)
	# make sure you have executed all the code in plfit.R before running this function
	# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
	# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
	from plfit import plfit
	a, xmin, L = plfit(degrees)
	print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L

	# plot the cumulative empirical distribution
	y = map(degrees.count, range(max(degrees)+1))
	x = range(len(y))

	cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

	import matplotlib.pyplot as plt

	plt.loglog(x,cumy, lw=2)
	plt.xlabel("degree k")
	plt.xlabel("P(x) >= k")

	# overlay the fitted distribution
	startval = cumy[xmin]
	xfit = range(xmin, max(x))
	fittedvals = map(lambda x:x*(-a+1)startval/xmin**(-a+1), xfit)
	#fittedvals = map(lambda x:x*(-a+1)startval/xmin**(-a+1), xfit)
	plt.loglog(xfit,fittedvals, 'r--', lw=4)

	# calculate the in and out degrees separately
	# use the degree() function and options for calculating directed degree
	# see the documentation here: http://igraph.sourceforge.net/documentation.html

	# see which nodes have the max out and indegree
	# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
	#maxodnode = g.vs[od.index(max(od))]

	# find undirected betweenness scores and then nodes with the max betweenness
	# warning, can be slow with large graphs, you may consider cutoff= instead
	bb = g.betweenness(directed=False, cutoff=5)
	maxbbnode = g.vs[bb.index(max(bb))]
	print 'Max betweenness: ', maxbbnode['label']

	# this high betweennes node may seem a bit surprising
	# you can check out its neighbors like this.
	print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label']

	# calculate Page Rank and find the node having the highest pagerank
	# you'll want the $vector portion of the answer returned
	# the assignment doesn't ask about this, but it's good to know how to do this...
	pr = g.pagerank()
	maxprnode = g.vs[pr.index(max(pr))]
	print 'Max Page Rank: ', maxprnode

	# calculate the Bonacich alpha-centrality of a lattice
	# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
	# to me for these values of alpha
	glfour = ig.Graph.Lattice([4,4])
	#unfortunately igraph python doesn't implement alpha centrality either
	#ac = g.alpha_centrality(alpha=-0.5)
	ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
	ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)

	#ac = g.alpha_centrality(alpha=+0.25)
	ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
	ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)