Last active
June 14, 2018 21:56
-
-
Save Gooseus/19fcd75b47cd13cb83fe892d9f9515c9 to your computer and use it in GitHub Desktop.
Updated, Adapted and Commented Simulation Code for "Homophily and Contagion Are Generically Confounded in Observational Social Network Studies"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from http://www.stat.cmu.edu/~cshalizi/homophily-confounding/asymmetry-act.R | |
# for R version 3.4.4 (2018-03-15) | |
# Simulate the asymmetric issue. | |
asymmetry.sim <- function (num.nodes=400, | |
scale=3, | |
offset=0, | |
y.noise=0.02, | |
friend.noms=1, | |
response="linear", | |
nominate.by="distance", | |
time.trend=0.3 | |
) { | |
#num.nodes=400; scale=3; offset=0; y.noise=0.02; response="not-linear"; nominate.by="not-distance";friend.noms=1; time.trend=0.3 | |
#covariates. | |
# define a logit^-1 function | |
invlogit <- function(cc) exp(cc)/(1+exp(cc)) | |
# get a vector of as many random values as we have nodes | |
xx <- runif(num.nodes) | |
# create a distance matrix from our random vector | |
distances <- as.matrix(dist(xx, diag=TRUE, upper=TRUE)) | |
#potential friends. | |
adj.probs <- array(rbinom(length(distances), 1, invlogit(offset-scale*distances)), dim(distances)) | |
diag(adj.probs) <- 0 | |
adj.probs[lower.tri(adj.probs)] <- t(adj.probs)[lower.tri(adj.probs)] | |
fixed.x.dist <- distances | |
diag(fixed.x.dist) <- Inf | |
nominees <- t(rbind(sapply(1:num.nodes, FUN=function(ii) { | |
possibles <- which(adj.probs[ii,] == 1) | |
return(ifelse(rep(nominate.by=="distance",friend.noms), | |
possibles[which(order(fixed.x.dist[ii,possibles])<=friend.noms)], | |
sample(possibles, size=friend.noms, prob=invlogit(-abs((xx[possibles] - 0.5)))) | |
)) | |
}))) | |
#num.nodes-by-friend.nom should be the size of the object. | |
#true adjacency matrix. | |
aa.mat <- array(0, dim(distances)) | |
for (ii in 1:num.nodes) aa.mat[ii, nominees[ii,]] <- 1 | |
#backwards matrix. | |
rev.aa.mat <- t(aa.mat) | |
#reciprocated matrix. | |
#recip.aa.mat <- aa.mat*rev.aa.mat | |
y1 = (xx-0.5)^3+rnorm(num.nodes,0,y.noise) | |
y2 = y1+rnorm(num.nodes,time.trend*xx,y.noise) | |
infl.y1 <- aa.mat%*%y1 | |
back.y1 <- rev.aa.mat%*%y1 | |
#recip.y1 <- recip.aa.mat%*%y1 | |
XX <- cbind(1, y1, infl.y1, back.y1) | |
trial <- lm(y2 ~ y1 + infl.y1 + back.y1) | |
#summary(trial) | |
coef.table <- summary(trial)$coefficients[,1] | |
v.plus.c <- c(diag(summary(trial)$cov.unscaled), summary(trial)$cov.unscaled[3,4])*summary(trial)$sigma^2 | |
#print(summary(trial)$cov.unscaled) | |
z.stat <- (coef.table[3]-coef.table[4])/sqrt(v.plus.c[3]+v.plus.c[4]+2*v.plus.c[5]) | |
out <- cbind(c(coef.table, v.plus.c, z.stat)) | |
rownames(out) <- c("int.b", "auto.b", "infl.b", "back.b", | |
"int.s", "auto.s", "infl.s", "back.s", | |
"cov.infl.back", | |
"z.stat") | |
return(out) | |
} | |
result <- replicate(5000, asymmetry.sim(friend=1, time.trend=0.3)) | |
#result <- replicate(50, asymmetry.sim(friend=1, time.trend=0.3)) | |
dim(result) | |
pdf("timeseriesmodel-act-5000.pdf", width=12, height=6) | |
par(mfrow=c(2,2)) | |
hist(result[3,1,], | |
main="Effect of Phantom `Influencer' on `Influenced' in Time Series", | |
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[3,1,]),digits=4))) | |
hist(result[2,1,], | |
main="Mutual Effect of Phantom Influence", | |
xlab=paste("Regression Coefficient Sum, Mean =",signif(mean(result[2,1,]),digits=4))) | |
hist(result[4,1,], | |
main="Effect of Phantom `Influenced' on `Influencer' in Time Series", | |
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[4,1,]),digits=4))) | |
hist(result[10,1,], | |
main="Directional Difference of Phantom Influence", | |
xlab=paste("Coefficient Difference, Mean =",signif(mean(result[10,1,]),digits=4),", Prop>0:", signif(mean(result[10,1,]>0),digits=4))) | |
dev.off() | |
par(mfrow=c(2,2)) | |
hist(result[3,1,], | |
main="Effect of Phantom `Influencer' on `Influenced' in Time Series", | |
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[3,1,]),digits=4))) | |
hist(result[1,1,], | |
main="Mutual Effect of Phantom Influence", | |
xlab=paste("Regression Coefficient Sum, Mean =",signif(mean(result[1,1,]),digits=4))) | |
hist(result[4,1,], | |
main="Effect of Phantom `Influenced' on `Influencer' in Time Series", | |
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[4,1,]),digits=4))) | |
hist(result[10,1,], | |
main="Directional Difference of Phantom Influence", | |
xlab=paste("Coefficient Difference, Mean =",signif(mean(result[10,1,]),digits=4),", Prop>0:", signif(mean(result[10,1,]>0),digits=4))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from http://www.stat.cmu.edu/~cshalizi/homophily-confounding/neutral-diffusion-act.R | |
# for R version 3.4.4 (2018-03-15) | |
#ACT's attempt at the voter model diffusion problem. | |
# Adapted for use with igraph instead of electrograph | |
# also updated to fix some bugs and to fix some pieces missing from the original paper | |
# load igraph for graph based stuff | |
library(igraph) # version 1.2.0 | |
# a layout which should increase the distance between nodes for prettier graphs | |
layout.by.attr <- function(graph, wc, cluster.strength=1,layout=layout.auto) { | |
g <- graph.edgelist(get.edgelist(graph)) # create a lightweight copy of graph w/o the attributes. | |
E(g)$weight <- 1 | |
attr <- cbind(id=1:vcount(g), val=wc) | |
g <- g + vertices(unique(attr[,2])) + igraph::edges(unlist(t(attr)), weight=cluster.strength) | |
l <- layout(g, weights=E(g)$weight)[1:vcount(graph),] | |
return(l) | |
} | |
# function create.network -- Create the toy network model... From page 13 of paper - https://arxiv.org/pdf/1004.4704.pdf | |
# We work with what is, frankly, a toy model of contagion (though, see footnote 13). | |
# There are n individuals connected in an undirected social network. | |
# Each individual i has an observed trait Xi, which is an unchanging variable; in our examples this will be binary. | |
# The network is homoophilous on this trait, so that individual with the same value of X are more likely to be connected. | |
# Individuals also have a time-varying choice, variable Yi(t), which again we will take to be binary. | |
# The initial choices, Yi(0), are set by flipping a fair coin (i.e. an unbiased Bernoulli process), and are therefore independent of Xi. | |
create.network <- function(nodes=100, # n individuals | |
homoph.factor=1, # factor of homophily, not used, assumed to be 1 | |
baseline.density=0.01, # the baseline probability of a connection | |
same.boost=0.05 # the boost to probability for sharing the homophilous trait | |
){ | |
# unbiased Bernoulli process to create vector of 0 or 1, the binary, unchanging, observed trait Xi | |
prop <- rbinom(nodes, 1, 0.5) | |
# generates a matrix of probabilities based between the homophilous trait Xi - 0.06 for sharing vs 0.01 for not | |
prob.matrix <- 1*(array(prop,c(nodes,nodes)) == t(array(prop,c(nodes,nodes))))*same.boost + baseline.density | |
# keep trying to make graphs using this method until one is created that is connected | |
connected = FALSE | |
while(!connected) { | |
# create the network adjacency matrix | |
# first pull an nodes^2 array of 0 or 1 values from a binomial distribution using the probability matrix | |
# and map to nxn array (matrix equivalent) | |
sociomat <- array(rbinom(length(prob.matrix), 1, prob.matrix), dim(prob.matrix)) | |
# transpose this array and map to boolean while adding to itself and remap to integer | |
# this ensures that the upper-right triangle is mapped to the lower-left for an undirected network | |
# but it does not seem to work... | |
# probably because it also transposes the bottom-left to the top-right... this double mirrors? | |
sociomat <- 1*(sociomat+t(sociomat)>0) | |
# set the diagonal equal to 0 to eliminate self-relationships in network | |
diag(sociomat) <- 0 | |
fix = graph_from_adjacency_matrix(sociomat) | |
connected = is.connected(fix) | |
} | |
sociomat <- as_adjacency_matrix(as.undirected(fix,mode="collapse"), sparse=FALSE) | |
# return both the adjacency matrix and the vector of Xi traits for individuals | |
return(list(socio=sociomat, traits=prop)) | |
} | |
# 13: Notice that the expected value of Yi_t(t + 1) is just the mean of Yj(t) for the j neighboring i_t. | |
# The expected value of Yi(t + 1) for all i is thus a weighted average of Yi(t) and the mean of their neighbors. | |
# At the level of expectations, then, this process belongs to the family of linear social influence models | |
# used in, e.g., Friedkin (1998). | |
# Choices evolve as follows: | |
# At each time t, we pick an individual It, uniformly at random from i ∈ { 1,...,n }, independently of all prior events. | |
# This individual then picks a neighbor, again uniformly at random, Jt ∈ { j : Ai_tj = 1 }, and either, | |
# with very high probability, copies their choice, so that Yi_t(t) = Yj_t(t-1), or, | |
# with very low probability, assumes the opposite choice, for Yi_t(t) = 1 - Yj_t(t-1); | |
# all other individuals retain their previous choices. | |
# This process repeats for each time step. | |
evolve.network <- function(sociomatrix, # our adjacency matrix | |
choice=rbinom(dim(sociomatrix)[1], 1, 0.5), # our initial choice matrix, Yi(0), from a Bernoulli process | |
save.points=1000, # number of points where we'll save our evolving choice matrix | |
iterations.per.save=10, # how many iterations between saving the choice matrix | |
prob.of.match = 0.85, # the probability that a pick with match their friend | |
prob.of.opp = 0.1 # the probability that a pick with match their friend | |
){ | |
# number of nodes (default=100) | |
nn <- dim(sociomatrix)[1] | |
# creat an uninitialized array of 100,1000 (defaults) | |
choice.out <- array(NA, c(nn, save.points)) | |
for (kk in 1:(save.points*iterations.per.save)) { | |
# pick individual It, uniformily at random from i ∈ { 1,...,n }, independently of all prior events. | |
pick <- sample(nn, 1) | |
# find the friends of our pick | |
friends = which(sociomatrix[pick,]==1) | |
# randomly select a friend and get their choice | |
friend.choice = sample(choice[friends],1) | |
# with high probability adopt their choice, with low probability adopt the opposite? | |
# original code does not have the opposite choice... | |
#choice[pick] <- sample(choice[friends],1) | |
if(prob.of.match>runif(1,0.0,1.0)) { | |
choice[pick] <- friend.choice | |
} else { | |
if(prob.of.opp>runif(1,0.0,1.0)) { | |
choice[pick] <- 1 - friend.choice | |
} | |
} | |
if (kk %% iterations.per.save == 0) { | |
# every iterations.per.save (10) iterations, save the evolving choice matrix | |
choice.out[,kk/iterations.per.save] <- choice | |
} | |
} | |
# return the array of choice matrices | |
return(choice.out) | |
} | |
# create the test network with a baseline density of 0.002 | |
nettest <- create.network(baseline.density=0.002) | |
# create the network graph | |
plot.obj <- graph_from_adjacency_matrix(nettest$socio, mode="undirected", diag=FALSE) | |
# save the initial positions | |
output.traits <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=3+nettest$traits,vertex.label=NA, vertex.size=5, | |
main="Network showing homophilous traits") | |
# plot the data with Xi trait colors | |
#plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
# vertex.color=1+nettest$traits,vertex.label=NA, vertex.size=5, | |
# main="Network showing homophilous traits") | |
#output.traits | |
# save our current data, for parania I suppose | |
save(nettest, plot.obj, output.traits, file="dont-lose-this.RData") | |
# create our initial choice vector from another fair binomial distribution | |
initial.choice <- rbinom(dim(nettest$socio)[1], 1, 0.5) | |
output.initial <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+initial.choice, vertex.label=NA, vertex.size=5, | |
main="Network showing initial choices") | |
#output.initial | |
# plot to PDF with colors indicating initial choice | |
pdf("diffusion-initial-act.pdf", width=10, height=6) | |
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+initial.choice, vertex.label=NA, vertex.size=5, | |
main="Network showing initial choices") | |
dev.off() # turn off the PDF device | |
# evolve our choices | |
choice.prog <- evolve.network(nettest$socio, choice=initial.choice) | |
output.midway <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+choice.prog[,297], vertex.label=NA, vertex.size=5, | |
main="Network showing choices at midway") | |
# plot midway choices to PDF | |
pdf("diffusion-midway-act.pdf", width=10, height=6) | |
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+choice.prog[,297], vertex.label=NA, vertex.size=5, | |
main="Network showing choices midway through simulation") | |
dev.off() # turn off the PDF device | |
output.final <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+choice.prog[,1000], vertex.label=NA, vertex.size=5, | |
main="Network showing choices at end") | |
# plot the last choices to PDF | |
pdf("diffusion-final-act.pdf", width=10, height=6) | |
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1), | |
vertex.color=1+choice.prog[,1000], vertex.label=NA, vertex.size=5, | |
main="Network showing choices at end of simulation") | |
dev.off() # turn off the PDF device | |
par(mfrow=c(2,2)) | |
output.traits | |
output.initial | |
output.midway | |
output.final | |
# Create a test network of the same density with no homophily | |
#dens <- mean(nettest$socio) | |
dens <- edge_density(plot.obj) | |
connected = FALSE | |
while(!connected) { | |
test.zero <- array(rbinom(length(nettest$socio), 1, dens/2), dim(nettest$socio)) | |
test.zero <- test.zero + t(test.zero) | |
# we need to also fix our test.zero plot, maybe | |
test.g = graph_from_adjacency_matrix(test.zero, mode="undirected", diag=FALSE) | |
connected = is.connected(test.g) | |
} | |
if(is.directed(test.g)) { | |
test.g = as.undirected(test.g,mode="collapse") | |
test.zero = as_adjacency_matrix(test.g, sparse=FALSE) | |
} | |
# evolve the neutral test network with the same initial choices | |
choice.zero <- evolve.network(test.zero, choice=initial.choice) | |
# find the effect of our homophilous network and the test network | |
prog.effect <- zero.effect <- array(NA, c(2,dim(choice.prog)[2])) | |
for (kk in 1:dim(choice.prog)[2]) { | |
res.e <- glm(choice.prog[,kk] ~ nettest$trait, family=binomial(link=logit)) | |
prog.effect[,kk] <- summary(res.e)$coef[2,1:2] | |
res.z <- glm(choice.zero[,kk] ~ nettest$trait, family=binomial(link=logit)) | |
zero.effect[,kk] <- summary(res.z)$coef[2,1:2] | |
} | |
prog.e <- prog.effect[,1:100] | |
zero.e <- zero.effect[,1:100] | |
lims <- range(c(prog.e[1,]-2*prog.e[2,], prog.e[1,]+2*prog.e[2,], | |
zero.e[1,]-2*zero.e[2,], zero.e[1,]+2*zero.e[2,])) | |
# plot the correlation of the homophilous network to PDF | |
pdf("correlation-homoph.pdf", width=10, height=5) | |
plot(c(0,1000), lims, ty="n", main="Homophilous Network", xlab="Step", ylab="Effect Size") | |
x.seq <- seq(10, 1000, by=10) | |
points(x.seq, prog.e[1,], col=2, pch=19) | |
segments(x.seq, prog.e[1,]-2*prog.e[2,], x.seq, prog.e[1,]+2*prog.e[2,], col=2) | |
abline(h=0, col=4) | |
dev.off() | |
# plot the correlation of the neutral network to PDF | |
pdf("correlation-neutral.pdf", width=10, height=5) | |
plot(c(0,1000), lims, ty="n", main="Neutral-Model Network", xlab="Step", ylab="Effect Size") | |
points(x.seq, zero.e[1,], col=1, pch=19) | |
segments(x.seq, zero.e[1,]-2*zero.e[2,], x.seq, zero.e[1,]+2*zero.e[2,], col=1) | |
abline(h=0, col=4) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment