Chapter 9 Do It Yourself
library(readr)
library(ggplot2)
library(ggraph)
library(igraph)
library(tidygraph)
library(tidyverse)
9.1 Eugenio Petrovich
Read-in data:
<- read_csv2("https://stulp.gmw.rug.nl/patio/Author_ackgees_homophily.csv")
eugenio head(eugenio)
## # A tibble: 6 × 6
## ID Sender Sender_gender Receiver Receiver_gender Weight
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 1 Sarah Moss female Sam Carter male 1
## 2 2 Sarah Moss female Keith DeRose male 1
## 3 3 Sarah Moss female Ben Holguin male 1
## 4 4 Sarah Moss female Ofra Magidor female 1
## 5 5 Sarah Moss female Daniel Rothschild male 1
## 6 6 Sarah Moss female Julia Staffel female 1
The edge list and node attributes are combined. Let’s seperate them first.
<- eugenio %>%
eugenio_edges select(Sender, Receiver, Weight) %>%
rename(from = "Sender",
to = "Receiver")
head(eugenio_edges)
## # A tibble: 6 × 3
## from to Weight
## <chr> <chr> <dbl>
## 1 Sarah Moss Sam Carter 1
## 2 Sarah Moss Keith DeRose 1
## 3 Sarah Moss Ben Holguin 1
## 4 Sarah Moss Ofra Magidor 1
## 5 Sarah Moss Daniel Rothschild 1
## 6 Sarah Moss Julia Staffel 1
# number of unique names in sender + gender of sender
<- eugenio %>%
sender_unique select(Sender, Sender_gender) %>%
rename(person = "Sender",
gender = "Sender_gender") %>% # rename variable for combining datasets
unique() # select only unique rows
<- eugenio %>%
receiver_unique select(Receiver, Receiver_gender) %>%
rename(person = "Receiver",
gender = "Receiver_gender") %>% # rename variable for combining datasets
unique() # select only unique rows
<- bind_rows(sender_unique, receiver_unique) %>% unique()
eugenio_nodes head(eugenio_nodes)
## # A tibble: 6 × 2
## person gender
## <chr> <chr>
## 1 Sarah Moss female
## 2 Colin Chamberlain male
## 3 Maegan Fairchild female
## 4 Ram Neta male
## 5 Matthew Mandelkern male
## 6 Jacob M. Nebel male
<- tbl_graph(nodes = eugenio_nodes, edges = eugenio_edges, directed = TRUE)
eugenio_nw
eugenio_nw
## # A tbl_graph: 5909 nodes and 18922 edges
## #
## # A directed multigraph with 27 components
## #
## # Node Data: 5,909 × 2 (active)
## person gender
## <chr> <chr>
## 1 Sarah Moss female
## 2 Colin Chamberlain male
## 3 Maegan Fairchild female
## 4 Ram Neta male
## 5 Matthew Mandelkern male
## 6 Jacob M. Nebel male
## # … with 5,903 more rows
## #
## # Edge Data: 18,922 × 3
## from to Weight
## <int> <int> <dbl>
## 1 1 1266 1
## 2 1 1267 1
## 3 1 1268 1
## # … with 18,919 more rows
Let’s calculate some clusters
<- eugenio_nw %>%
eugenio_nw activate(nodes) %>%
mutate(clusters = group_infomap()) %>%
group_by(clusters) %>%
mutate(cluster_size = n()) %>% # determine cluster size
ungroup()
It takes llloooonnnnggggg, and the result is not too insightful.
ggraph(eugenio_nw, layout = "mds") +
geom_edge_link(alpha = 0.1) +
geom_node_point(aes(colour = factor(clusters)) )+
theme_graph() +
scale_colour_viridis_d() +
theme(legend.position = "none")
Because it takes so long, I decided to select only the two largest clusters.
# Retrieve 2 largest clusters
<- eugenio_nw %>%
top_2 activate(nodes) %>%
as_tibble() %>%
arrange(desc(cluster_size)) %>%
select(cluster_size) %>%
unique() %>%
slice(1:2) %>%
pull(cluster_size)
# select tidygraph object with only two largest clusters
<- eugenio_nw %>%
eugenio_nw_top2 activate(nodes) %>%
filter(cluster_size %in% top_2)
ggraph(eugenio_nw_top2, layout = "kk") +
geom_edge_link(alpha = 0.1) +
geom_node_point(aes(colour = factor(clusters), shape = gender)) +
theme_graph()
9.2 Christina Prell
# adjacency matrix
<- read_csv("https://stulp.gmw.rug.nl/patio/full_100_2SDstrong_asymetric.csv") %>%
prell_adj column_to_rownames(var = "...1") # turn column into rownames
<- read.table("https://stulp.gmw.rug.nl/patio//Attrib_image.txt", header = TRUE, sep = "\t") %>%
prell_nodes rename(organisation = "X")
# Combine adjacancy matrix and node attributes
<- as_tbl_graph(prell_adj) %>%
prell_nw activate(nodes) %>%
left_join(prell_nodes, by = c("name" = "organisation"))
prell_nw
## # A tbl_graph: 100 nodes and 881 edges
## #
## # An undirected multigraph with 51 components
## #
## # Node Data: 100 × 7 (active)
## name Mode_n ORG_TYPE Vag_ord indeg_both OutDeg_org Indeg_org
## <chr> <int> <int> <int> <int> <int> <int>
## 1 54events.nl 1 2 2 1 1 1
## 2 allesoverwindenergie.… 1 2 7 1 0 1
## 3 amnesty.org 1 1 6 0 0 0
## 4 anteagroup.nl 1 2 5 0 0 0
## 5 apple.com 1 2 3 0 0 0
## 6 beccuijk.nl 1 2 1 1 0 1
## # … with 94 more rows
## #
## # Edge Data: 881 × 3
## from to weight
## <int> <int> <dbl>
## 1 1 1 1
## 2 4 4 5
## 3 4 8 4
## # … with 878 more rows
Not extremely useful…
# Create bipartite
<- prell_nw %>%
prell_nw activate(nodes) %>%
mutate(is_0_type = ORG_TYPE == 0,
lvl = ORG_TYPE)
ggraph(prell_nw, layout = "fr") +
geom_edge_link() +
geom_node_point(aes(colour = factor(ORG_TYPE))) +
theme_graph()
Christina: you’re question was not too easy, here are three potential answers to your question, we can see if we can make it work:
9.3 Annie Kok
(data not available)
library(readxl)
<- read_csv("data/afterrobberyedgelist.csv") %>%
edges_kok rename(from = "sender",
to = "receiver") %>%
mutate(location_num = as.numeric(factor(Location)))
<- as_tbl_graph(edges_kok)
nw_kok
nw_kok
## # A tbl_graph: 142 nodes and 323 edges
## #
## # A directed multigraph with 5 components
## #
## # Node Data: 142 × 1 (active)
## name
## <chr>
## 1 Acc_4
## 2 Acc_17
## 3 Unk_8
## 4 Acc_16
## 5 Unk_65
## 6 Unk_97
## # … with 136 more rows
## #
## # Edge Data: 323 × 7
## from to DateTime Location event datetime location_num
## <int> <int> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 82 1159780008 01-RB_Lighthouse-0 after_9 2006/10/02 09:… 2
## 2 1 82 1159812248 01-RB_Lighthouse-0 after_9 2006/10/02 18:… 2
## 3 2 83 1159813735 01-RB_Lighthouse-0 after_9 2006/10/02 18:… 2
## # … with 320 more rows
ggraph(nw_kok, layout = "stress") +
geom_edge_link(aes(colour = factor(location_num))) +
geom_node_point() +
theme_graph() +
theme(
legend.position = "none"
)