Last active
August 8, 2017 18:38
-
-
Save rvanbruggen/0174e5d86cf9553657470e63565dce96 to your computer and use it in GitHub Desktop.
Importing and querying the web of Belgian Public companies and their ceo's/chairmen
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Importing from the Google Spreadsheet | |
//import the Person nodes | |
load csv with headers from | |
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=0" as persons | |
create (n:Node:Person) | |
set n = persons; | |
//import the Company nodes | |
load csv with headers from | |
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=2040965723" as companies | |
create (n:Node:Company) | |
set n = companies; | |
///////////////////////////////////////////// | |
//IMPORTING THE RELATIONSHIPS: 2 ALTERNATIVES | |
///////////////////////////////////////////// | |
//IF YOU WANT GENERIC RELATIONSHIP TYPE AND JUST WANT TO USE STANDARD CYPHER, USE THIS | |
//import the relationships | |
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv | |
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target}) | |
create (source)-[:RELATED_TO {type: csv.mandate}]->(target); | |
//IF YOU WANT SPECIFIC RELATIONSHIP TYPES AND HAVE APOCs ENABLED, USE THIS | |
//specific rels | |
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv | |
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target}) | |
CALL apoc.create.relationship(source,csv.mandate,{},target) yield rel | |
return count(*); | |
//Colour males and females with new labels | |
match (n:Person) | |
where n.gender = "M" | |
set n:Male; | |
match (n:Person) | |
where n.gender = "V" | |
set n:Female; | |
//remove the old labels | |
match (n:Node) | |
remove n:Node; | |
//create the INDEXES | |
create index on :Male(name); | |
create index on :Female(name); | |
create index on :Company(name); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//import from original files | |
//import the Person nodes | |
load csv with headers from | |
"http://multimedia.tijd.be/bestuurders/data/nodesdef.csv" as nodes | |
create (n:Node) | |
set n = nodes; | |
//import the relationships | |
load csv with headers from "http://multimedia.tijd.be/bestuurders/data/linksdef.csv" as rels | |
match (source:Node {ID: rels.source}), (target:Node {ID: rels.target}) | |
create (source)-[:RELATED_TO {type: rels.mandate}]->(target); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Queries on the Belgian Public Company Dataset | |
// degree of the Persons | |
match (p:Person) | |
return p.name, size( (p)--() ) as degree | |
order by degree desc | |
limit 10 | |
//3-hop network around Luc Bertrand | |
match path = (m:Male)-[r*..3]-(n) | |
where m.name contains "Bertrand" | |
return path | |
//explore the links between highly connected nodes | |
//between Philippe Vlerick and Luc Bertrand | |
match (vlerick:Person {name:"Philippe Vlerick"}), (bertrand:Person {name:"Luc Bertrand"}), | |
path = allshortestpaths ((vlerick)-[*]-(bertrand)) | |
return path; | |
//Between Bert De Graeve and Luc Bertrand | |
match (degraeve:Person {name:"Bert De Graeve"}), (bertrand:Person {name:"Luc Bertrand"}), | |
path = allshortestpaths ((degraeve)-[*]-(bertrand)) | |
return path; | |
//Between Bert De Graeve and Frank Donck | |
match (degraeve:Person {name:"Bert De Graeve"}), (donck:Person {name:"Frank Donck"}), | |
path = allshortestpaths ((degraeve)-[*]-(donck)) | |
return path; | |
//explore some links between companies | |
match (kbc:Company {name:"KBC"}), (li:Company {name:"AB INBEV"}), | |
path = allshortestpaths ((kbc)-[*]-(li)) | |
return path; | |
//what is the maximum diameter of the graph | |
//maximum diameter as text | |
MATCH (a:Person), (b:Person) WHERE id(a) > id(b) | |
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b)) | |
with length(p) AS len, extract(x IN nodes(p) | x.name) AS path | |
ORDER BY len DESC LIMIT 1 | |
return path | |
//maximum diameter as a graph | |
MATCH (a:Person), (b:Person) WHERE id(a) > id(b) | |
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b)) | |
with length(p) AS len, p | |
ORDER BY len DESC LIMIT 1 | |
return p | |
//explore some graph algos with APOC | |
//algos | |
//betweenness centrality | |
MATCH (node:Person) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
CALL apoc.algo.betweenness(['RELATED_TO'],nodes,'BOTH') YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC | |
//closeness centrality | |
MATCH (node:Person) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
CALL apoc.algo.closeness(['RELATED_TO'],nodes,'INCOMING') YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC | |
//pageRank for Companies | |
MATCH (node:Company) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
// compute over relationships of all types | |
CALL apoc.algo.pageRank(nodes) YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment