## Install the following package
# install.packages ("relevent")
library (relevent)
library(sna)
library(ggplot2)
library(igraph)
library(reticulate) # this package is used to access python modules in R which requires python to be installed on the machine. If some modules are needed to execute functions, py_install("name of module") can be used to install the modules. 
library(RColorBrewer)

Welcome to the practical for social network analysis Part 2. In this part you will apply Relational Event Models (REMs) on real relational event history (REH) data: Apollo 13 voice loop data and Twitter data. These data sets are stored in UUsummerschool.rdata.

1. Relational event model (REM)

A reminder about REMs: dyadic REMs are intended to capture the behavior of systems in which individual social units (persons, organizations, animals, companies, countries, etc.) direct discrete actions towards other individuals in their environment, which form a social network.

As in the Part 1, first, we need to load UUsummerschool.rdata and see what is inside of it.

# Set your own working directory
# setwd()

# Load data using load():
load("UUsummerschool.rdata") 
# This R-data contains 11 objects:
ls()
## [1] "as.sociomatrix.eventlist" "Class"                   
## [3] "ClassIntercept"           "ClassIsFemale"           
## [5] "ClassIsTeacher"           "PartOfApollo_13"         
## [7] "Twitter_data_rem3"        "WTCPoliceCalls"          
## [9] "WTCPoliceIsICR"

let’s check the objects that we will work with in this practical.

as.sociomatrix.eventlist
## function (eventlist, n) 
## {
##     g <- matrix(0, n, n)
##     if (NROW(eventlist) > 0) {
##         tabmat <- table(eventlist[, -1, drop = FALSE])
##         g[as.numeric(dimnames(tabmat)[[1]]), as.numeric(dimnames(tabmat)[[2]])] <- tabmat
##     }
##     g
## }
## attr(,"source")
## [1] "function(eventlist,n){"                                                             
## [2] "  g<-matrix(0,n,n)"                                                                 
## [3] "  if(NROW(eventlist)>0){"                                                           
## [4] "    tabmat<-table(eventlist[,-1,drop=FALSE])"                                       
## [5] "    g[as.numeric(dimnames(tabmat)[[1]]), as.numeric(dimnames(tabmat)[[2]])]<-tabmat"
## [6] "  }"                                                                                
## [7] "  g"                                                                                
## [8] "}"
head(PartOfApollo_13)
##      time sender receiver
## 1 11849.2     18        2
## 2 11854.2      2       18
## 3 11885.2     18        2
## 4 11890.2      2       18
## 5 12232.2      2       17
## 6 12342.2     17        2
head(Twitter_data_rem3)
##   time_day source target
## 1     0.00      8      1
## 2   121.03     28      2
## 3   199.08     28      2
## 4   266.95      4      3
## 5   573.59     22      5
## 6   574.49     25      5

If you need a reminder on what a sociomatrix is, please refer to the Part 1 of REM practical.

2. Apollo 13 data & visualization, centrality measures, and community detection

During this practical we will analyze part of the Apollo 13 mission. The mission launched as scheduled at 2:13:00EST (19:13:00 UTC) on April 11, 1970. On board were James Lovell Jr. (Commander, CDR), John ”Jack” Swigert Jr. (Command Module Pilot, CMP), and Fred Haise Jr. (Lunar Module Pilot, LMP). The mission was quite routine and everything went to plan, except when at 56:54:53, the astronauts heard a ”pretty large bang” and experienced fluctuations in electrical power and control thrusters. This sets a series of events in motion. With oxygen levels depleting fast, the astronauts not only faced a risk of running out of oxygen to breathe. Therefore, they decided to abort the mission and come back to earth. This indeed changed their communications and interactions during the mission, as both the astronauts and mission control had to solve unexpected and urgent problems in order to bring the crew home alive.

The data come from the Apollo 13’s voice loops transcripts, obtained from http://apollo13realtime.org/ and https://history.nasa.gov/afj/ap13fj/07day3-before-the-storm.html; the data include the Flight directors’ voice loop and the air-ground’s voice loop. Flight directors (Houston’s Mission Control Center) were located in Houston and the crew (astronauts) were connected to this control center via Capsule Communicator (CAPCOM). The Apollo 13 data is an ideal benchmark data to study communication/interaction pattern. In this practical we use a part of the data, one hour before incident till one hour after that. The eventlist is stored in an object called PartOfApollo_13 and contain four columns: time, sender, receiver and message. Note that we know precisely when these calls were made. Therefore, we apply REM considering that the the exact event times are known. That should be notified that the number of actors is 19.

Question 1: Convert the Apollo 13 data eventlist into a sociomatrix.
Hint: as.sociomatrix.eventlist(data, number of actors).

ApolloNet <- as.sociomatrix.eventlist(PartOfApollo_13 ,19)

Question 2: Use graph_from_adjacency_matrix() to convert the Apollo 13 adjacency matrix into a graph object.
Hint: graph_from_adjacency_matrix(sociomatrix matrix).

# transforming Apollo 13 adjacency matrix into igraph bject
Apollo_graph <- graph_from_adjacency_matrix(ApolloNet)

Question 3: Assign corresponding numbers as vertex names.
Hint: You can set vertex attributes by using V(graph)$name_of_attribute.

# adding vertex attributes to the igraph object (useful for calculating centrality measures)
V(Apollo_graph)$name <- seq(1,19)

Question 4: Using igraph package visualize the Apollo 13 data using different layouts to see which layout is the most beneficial. First, use a circular layout.
Hint: plot(graph, layout=layout_in_circle). Due to the size of this network it could be beneficial to add weights to the edges to represent the frequency of occurrence of an edge. You can add weights to the edges by using E(your graph)$weight and assigning a 1 to every edge. Then, you need to use simplify(your graph) to collapse all repeated edges into single edges with weights. When you visualize the graph, you can add an argument edge.width=E(your graph)$weight and multiply it by a number (e.g., 0.05) to have the thickness of the edges represent the weights.

# Circular layout
plot(Apollo_graph, layout=layout_in_circle)
Circular Layout of the Apollo13 Network

Circular Layout of the Apollo13 Network

Now, try a Fruchterman and Reingold method.
Hint: plot(graph, layout=layout_with_fr).

# Fruchterman and Reingold method 
plot(Apollo_graph, layout=layout_with_fr)
Fruchterman and Reingold Layout of the  Apollo13 Network

Fruchterman and Reingold Layout of the Apollo13 Network

Also try the Kamada and Kawai method.
Hint: plot(graph, layout=layout_with_kk).

# Kamada and Kawai method
plot(Apollo_graph, layout=layout_with_kk)
Kamada and Kawai Layout of the Apollo13 Network

Kamada and Kawai Layout of the Apollo13 Network

# Due to the Apollo 13 data having a large number of edges we can add weights to represent the number of occurrences of an edge between the same pair of nodes to better distinguish the structure of the graph.  
E(Apollo_graph)$weight <- 1
Apollo_g2 <- simplify(Apollo_graph)

plot(Apollo_g2, 
     edge.width=E(Apollo_g2)$weight*0.05,
     layout=layout_in_circle)
Circular Layout of the Weighted Apollo13 Network

Circular Layout of the Weighted Apollo13 Network

Question 5: What are the size and order of this network?
Hint: gsize(graph), gorder(graph).

gsize(Apollo_graph)
## [1] 3882
gorder(Apollo_graph)
## [1] 19

Question 6: Create degree distributions of the Apollo 13 data.
Hint: degree(graph, mode=c("in", "out", "total")); ggplot(data, aes()) + geom_histogram() + labs(title="", x="", y="").

# Since we are working with directed graph we can create indegree and outdegree distributions:

# List of degrees
G.indegrees <- igraph::degree(Apollo_graph, mode = 'in')
G.outdegrees <- igraph::degree(Apollo_graph, mode = 'out')
G.total_degrees <- igraph::degree(Apollo_graph, mode = 'total')

# creating a dataframe to be able to work with ggplot
G.degree.histogram <- data.frame(G.indegrees, G.outdegrees, G.total_degrees)

# ploting indegree distribution
ggplot(G.degree.histogram, aes(x = G.indegrees)) +
  geom_histogram(binwidth = 10,
                   colour= "black", fill = "white") +
  labs(title="Indegree Distribution", x="Vertex Degree", y="Frequency") 

# ploting outdegree distribution
ggplot(G.degree.histogram, aes(x = G.outdegrees)) +
  geom_histogram(binwidth = 10,
                   colour= "black", fill = "white") +
  labs(title="Outdegree Distribution", x="Vertex Degree", y="Frequency") 

# ploting total degree distribution
ggplot(G.degree.histogram, aes(x = G.total_degrees)) +
  geom_histogram(binwidth = 15,
                   colour= "black", fill = "white") +
  labs(title="Total Degree Distribution", x="Vertex Degree", y="Frequency")

#hist(degree(Apollo_graph, mode = 'in'), col='lightblue', xlab='Vertex Degree',
     #ylab='Frequency', main='')
#hist(degree(Apollo_graph, mode = 'out'), col='blue', xlab='Vertex Degree', 
     #ylab='Frequency', main='')

The degree distributions show that many vertices have varying degrees with one node having noticeably higher in-, out-, and total degrees.

Question 7: Calculate centrality measures: degree, betweeness, and eigenvalue (closeness cannot be calculated due to the presence of isolated nodes). Extract nodes that score the highest on each centrality measure.
Hint: degree(graph, ..., mode=c("all", "out", "in", "total")); betweenness(graph, directed = TRUE, ...); eigen_centrality(graph, directed = TRUE, ...).

a) highest degree

in_degree_apollo <- igraph::degree(Apollo_graph, mode='in')
out_degree_apollo <- igraph::degree(Apollo_graph, mode='out')
total_degree_apollo <- igraph::degree(Apollo_graph, mode='total')

in_degree_apollo[in_degree_apollo == max(in_degree_apollo)]
##    2 
## 1162
out_degree_apollo[out_degree_apollo == max(out_degree_apollo)]
##    2 
## 1115
total_degree_apollo[total_degree_apollo == max(total_degree_apollo)]
##    2 
## 2277

We can see that node 2 is the most ‘important’ node in this network according to all three degree measures.

b) highest betweenness

between_apollo <- betweenness(Apollo_graph, directed = TRUE, normalized = TRUE)
between_apollo[between_apollo == max(between_apollo)]
##         7 
## 0.4249329

The node that occurs most often on the shortest path between a pair of other nodes is node 7.

c) highest eigenvector

eigen_apollo <- eigen_centrality(Apollo_graph, directed = TRUE)
## Warning in eigenvector_centrality_impl(graph = graph, directed = directed, : At
## vendor/cigraph/src/centrality/eigenvector.c:304 : Weighted directed graph in
## eigenvector centrality.
eigen_values <- eigen_apollo[1]
eigen_values <- unlist(eigen_values)
eigen_values[eigen_values == max(eigen_values)]
## vector.2 
##        1

The node with the most connected neighbors is node 2.

Question 8: Apply community detection algorithm on the Appollo 13 data. First, use naive approach. If you need a reminder on community detection, refer to Q11 of Part 1 of the REM practical.
Hint: as.undirected(graph, mode = c("collapse", "each", "mutual"); cluster_fast_greedy(graph).

# Converting digraph into undirected graph without multyedges:
undirected.Apollo <- as.undirected(
  Apollo_graph,
  mode = c("collapse"))

# use fast and greedy algorithm for hierarchical clustering 
auc <- cluster_fast_greedy(undirected.Apollo)

Use length() to check how many communities the algorithm has identified.

length(auc)
## [1] 5

Use sizes() to see how large those communities are.

sizes(auc)
## Community sizes
##  1  2  3  4  5 
## 12  4  1  1  1

Plot the results of the community detection using plot(communities, graph).

# plot clusters
plot(auc, undirected.Apollo)

Naive approach identified two main communities and the rest are just single isolated nodes. After plotting the results we can see that the largest community is clearly centered around node 7.

Create a dendrogram using dendPlot(communities, mode=‘phylo’).

# plot a dendrogram
dendPlot(auc, mode='phylo')
## Warning: `dendPlot()` was deprecated in igraph 2.0.0.
## ℹ Please use `plot_dendrogram()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

According to the dendrogram, the nodes in the clusters appear to be quite similar except nodes 4 and 7, which seem to differ from the rest of the nodes in their cluster. Looking at the graph plot we can see that it is likely related to the fact that nodes 4 and 7 are highly central in their cluster.

3. Project 1: Application of REM on Apollo 13 data

We are again going to use REMs for event histories with exact timing information.
Let’s consider Apollo 13 mission data. Read the description of the data at the beginning of this practical and also check the aforementioned websites for more information. In this case, event time is given in increments of seconds from the onset of observation.

In this data the actors are as follows:

  • AFD: Assistant Flight Director from Flight directors (1)

  • CAPCOM: Capsule Communicator from Flight directors (2)

  • CONTROL: Control Officer from Flight directors (3)

  • EECOM: Electrical, Environmental and Consumables Manager from Flight directors (4)

  • All : Ground control team (without flight directores) (5)

  • FDO : Flight dynamics officer (FDO or FIDO) (6)

  • FLIGHT: Flight Director from Flight directors (7)

  • GNC: The Guidance, Navigation, and Controls Systems Engineer from Flight directors (8)

  • GUIDO: Guidance Officer from Flight directors (9)

  • INCO: Integrated Communications Officer from Flight directors (10)

  • NETWORK: Network of ground stations from Flight directors (11)

  • TELMU: Telemetry, Electrical, and EVA Mobility Unit Officer from Flight directors (12)

  • RECOVERY: Recovery Supervisor from Flight directorsc (13)

  • PROCEDURES: Organization & Procedures Officer from Flight directors (14)

  • FAO: Flight activities officer from Flight directors (15)

  • RETRO: Retrofire Officer from Flight directors (16)

  • CDR: Commander James A. Lovell Jr. crew (astronauts) (17)

  • CMP: Command Module Pilot John (Jack) L. Swigert Jr. crew (astronauts) (18)

  • LMP: Lunar module pilot Fred W. Haise Jr. crew (astronauts) (19)

Question 9: First look at the data, plot the network and start with fitting a simple model. Next, add the statistics of interest to the model and see the performance of the new model.

gplot(ApolloNet, edge.lwd=ApolloNet^0.75, arrowhead.cex=
        log(as.edgelist.sna(ApolloNet)[,3])+.25,vertex.col="gray",
        vertex.cex=1.25)  
Apolo13 Network

Apolo13 Network

Apollo_mission1<-rem.dyad(PartOfApollo_13,n=19,effects=c("PSAB-BA"), ordinal=TRUE,hessian= TRUE)
## Prepping edgelist.
## Checking/prepping covariates.
## Computing preliminary statistics
## Fitting model
## Obtaining goodness-of-fit statistics
summary(Apollo_mission1)
## Relational Event Model (Ordinal Likelihood)
## 
##         Estimate  Std.Err Z value  Pr(>|z|)    
## PSAB-BA 6.453583 0.033668  191.69 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Null deviance: 45301.47 on 3882 degrees of freedom
## Residual deviance: 20850.3 on 3881 degrees of freedom
##  Chi-square: 24451.17 on 1 degrees of freedom, asymptotic p-value 0 
## AIC: 20852.3 AICC: 20852.3 BIC: 20858.56
Apollo_mission2<-rem.dyad(PartOfApollo_13,n=19,effects=c("PSAB-BA","RRecSnd"), ordinal=TRUE,hessian= TRUE)
## Prepping edgelist.
## Checking/prepping covariates.
## Computing preliminary statistics
## Fitting model
## Obtaining goodness-of-fit statistics
summary(Apollo_mission2)
## Relational Event Model (Ordinal Likelihood)
## 
##         Estimate  Std.Err Z value  Pr(>|z|)    
## RRecSnd 5.065585 0.067967  74.530 < 2.2e-16 ***
## PSAB-BA 2.953172 0.038625  76.458 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Null deviance: 45301.47 on 3882 degrees of freedom
## Residual deviance: 14706.31 on 3880 degrees of freedom
##  Chi-square: 30595.16 on 2 degrees of freedom, asymptotic p-value 0 
## AIC: 14710.31 AICC: 14710.32 BIC: 14722.84
Apollo_mission2$BIC- Apollo_mission1$BIC
## [1] -6135.723
Apollo_mission3<-rem.dyad(PartOfApollo_13,n=19,effects=c("PSAB-BA","RRecSnd","ITPSnd"), ordinal=TRUE,hessian= TRUE)
## Prepping edgelist.
## Checking/prepping covariates.
## Computing preliminary statistics
## Fitting model
## Obtaining goodness-of-fit statistics
summary(Apollo_mission3)
## Relational Event Model (Ordinal Likelihood)
## 
##            Estimate     Std.Err Z value Pr(>|z|)    
## RRecSnd  5.07271379  0.06838341 74.1805   <2e-16 ***
## ITPSnd  -0.00038064  0.00043885 -0.8673   0.3858    
## PSAB-BA  2.95169945  0.03863341 76.4028   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Null deviance: 45301.47 on 3882 degrees of freedom
## Residual deviance: 14705.64 on 3879 degrees of freedom
##  Chi-square: 30595.83 on 3 degrees of freedom, asymptotic p-value 0 
## AIC: 14711.64 AICC: 14711.65 BIC: 14730.43
Apollo_mission3$BIC- Apollo_mission2$BIC
## [1] 7.592304

4. Twitter data & visualization, centrality measures, and community detection.

The data was extracted from the Academic Twitter API in two steps. In the first step, all users being mentioned using the hashtags ‘#ic2s2’ or ‘#’netsciXXXX’ (XXXX = 2010, 2011, … 2022) were extracted. In the second step, all mentions of those users were extracted, up to 800 tweets. Finally, the core of the network was extracted by keeping users with an in-degree and out-degree over K=150 mentions. As there were some overlaps in the time, we modified the data in order to be usable for the REM model. The source or sender is a person tweeting and the target or receiver is the person mentioned. Note that the time was date of tweet, which was converted to day. In this REH data the number of actors is 39.

Question 10: Convert the Twitter data eventlist into a sociomatrix.
Hint: as.sociomatrix.eventlist(data, number of actors).

twitter_NET <- as.sociomatrix.eventlist(Twitter_data_rem3 ,39)

Question 11: Use graph_from_adjacency_matrix() to convert the Twitter adjacency matrix into a graph object.
Hint: graph_from_adjacency_matrix(sociomatrix).

# transforming Twitter adjacency matrix into igraph object
twitter_graph <- graph_from_adjacency_matrix(twitter_NET)

Question 12: Assign corresponding numbers as vertex names.
Hint: You can set vertex attributes by using V(graph)$name_of_attribute.

# adding vertex attributes to the igraph object (useful for calculating centrality measures)
V(twitter_graph)$name <- seq(1,39)

Question 13: Using igraph package visualize the Twitter data using different layouts to see which layout is the most beneficial. First, use a circular layout.
Hint: plot(graph, layout=layout_in_circle). Check the hint of Q4 to see how you can add weights to the edges.

# Circular layout
plot(twitter_graph, layout=layout_in_circle)
Circular Layout of the Twitter Network

Circular Layout of the Twitter Network

Now, try a Fruchterman and Reingold method.
Hint: plot(graph, layout=layout_with_fr).

# Fruchterman and Reingold method 
plot(twitter_graph, layout=layout_with_fr)
Fruchterman and Reingold Layout of the Twitter Network

Fruchterman and Reingold Layout of the Twitter Network

Try also the Kamada and Kawai method.
Hint: plot(graph, layout=layout_with_kk).

# Kamada and Kawai method
plot(twitter_graph, layout=layout_with_kk)
Kamada and Kawai Layout of the Twitter Network

Kamada and Kawai Layout of the Twitter Network

# Due to Twitter data having a large number of edges we can add weights to represent the number of occurrences of an edge between the same pair of nodes to better distinguish the structure of the graph.  

E(twitter_graph)$weight <- 1
twitter_g2 <- simplify(twitter_graph)

plot(twitter_g2, 
     edge.width=E(twitter_g2)$weight*0.05,
     layout=layout_with_kk)
Kamada and Kawai Layout of the Weighted Twitter Network

Kamada and Kawai Layout of the Weighted Twitter Network

Question 14: What are the size and order of this network?
Hint: gsize(graph), gorder(graph).

gsize(twitter_graph)
## [1] 1406
gorder(twitter_graph)
## [1] 39

Question 15: Create degree distributions of the Twitter data.
Hint: degree(graph, mode=c("in", "out", "total")); ggplot(data, aes()) + geom_histogram() + labs(title="", x="", y="").

# Since we are working with directed graph we can create indegree and outdegree distributions:

# calculating degrees
G.indegrees <- igraph::degree(twitter_graph, mode = 'in')
G.outdegrees <- igraph::degree(twitter_graph, mode = 'out')
G.total_degrees <- igraph::degree(twitter_graph, mode = 'total')

# creating a dataframe to be able to work with ggplot
G.degree.histogram <- data.frame(G.indegrees, G.outdegrees, G.total_degrees)

# ploting indegree distribution
ggplot(G.degree.histogram, aes(x = G.indegrees)) +
  geom_histogram(binwidth = 3,
                   colour= "black", fill = "white") +
  labs(title="Indegree Distribution", x="Vertex Degree", y="Frequency") 

# ploting outdegree distribution
ggplot(G.degree.histogram, aes(x = G.outdegrees)) +
  geom_histogram(binwidth = 3,
                   colour= "black", fill = "white") +
  labs(title="Outdegree Distribution", x="Vertex Degree", y="Frequency")

# ploting total degree distribution
ggplot(G.degree.histogram, aes(x = G.total_degrees)) +
  geom_histogram(binwidth = 3,
                   colour= "black", fill = "white") +
  labs(title="Total Degree Distribution", x="Vertex Degree", y="Frequency")

#hist(degree(twitter_graph, mode = 'in'), col='lightblue', xlab='Vertex Degree',
     #ylab='Frequency', main='')
#hist(degree(twitter_graph, mode = 'out'), col='blue', xlab='Vertex Degree', 
     #ylab='Frequency', main='')

According to the degree distributions, the Twitter network is well connected, with a few nodes being better connected than the rest, and with one node in particular that has nearly 250 total connections.

Question 16: Calculate centrality measures: degree, betweeness, closeness and eigenvalue. Extract nodes that score the highest on each centrality measure.
Hint: degree(graph, ..., mode=c("all", "out", "in", "total")); betweenness(graph, directed = TRUE, ...); closeness(graph, ...); eigen_centrality(graph, directed = TRUE, ...).

a) highest degree

in_degree_twitter <- igraph::degree(twitter_graph, mode='in')
out_degree_twitter <- igraph::degree(twitter_graph, mode='out')
total_degree_twitter <- igraph::degree(twitter_graph, mode='total')

in_degree_twitter[in_degree_twitter == max(in_degree_twitter)]
##   4 
## 186
out_degree_twitter[out_degree_twitter == max(out_degree_twitter)]
##  16 
## 119
total_degree_twitter[total_degree_twitter == max(total_degree_twitter)]
##   4 
## 237

Node 4 has the highest in- and total degree, while node 16 has the highest outdegree.

b) highest betweenness

between_twitter <- betweenness(twitter_graph, directed = TRUE, normalized = TRUE)
between_twitter[between_twitter == max(between_twitter)]
##         20 
## 0.06441726

Node 20 occurs most often on the shortest path between pairs of other nodes.

c) highest closeness

closeness_twitter <- closeness(twitter_graph, normalized = TRUE)
closeness_twitter[closeness_twitter == max(closeness_twitter, na.rm = TRUE) & !is.na(closeness_twitter)]
##        16 
## 0.8085106

Node 16 is located closest to all other nodes.

d) highest eigenvector

eigen_twitter <- eigen_centrality(twitter_graph, directed = TRUE)
## Warning in eigenvector_centrality_impl(graph = graph, directed = directed, : At
## vendor/cigraph/src/centrality/eigenvector.c:304 : Weighted directed graph in
## eigenvector centrality.
eigen_values <- eigen_twitter[1]
eigen_values <- unlist(eigen_values)
eigen_values[eigen_values == max(eigen_values)]
## vector.4 
##        1

Node 4 has the most well-connected neighbors.

Question 17: Apply community detection algorithm on the Twitter data. First, use naive approach.
Hint: as.undirected(graph, mode = c("collapse", "each", "mutual"); cluster_fast_greedy(graph).

# Converting digraph into undirected graph without multyedges:
undirected.twitter <- as.undirected(
  twitter_graph,
  mode = c("collapse"))

# use fast and greedy algorithm for hierarchical clustering 
tuc <- cluster_fast_greedy(undirected.twitter)

Use length() to check how many communities the algorithm has identified.

length(tuc)
## [1] 3

Use sizes() to see how large those communities are.

sizes(tuc)
## Community sizes
##  1  2  3 
## 12 12 15

Plot the results of the community detection using plot(communities, graph).

# plot clusters
plot(tuc, undirected.twitter)

After visually exploring the results of the community detection we can see that the connections seem relatively uniform and there are a lot of edges between the members of different communities. These results are expected given the degree distribution of this network.

Create a dendrogram using dendPlot(communities, mode=‘phylo’).

# plot a dendrogram
dendPlot(tuc, mode='phylo')

According to the dendrogram, all nodes are quite similar to other nodes in their respective communities. In addition, it appears that the heights of the branches pointing to different clusters are also similar suggesting that there isn’t a very clear distinction between the clusters.

5. Project 2: Application of REM on Twitter data

Question 18: First, look at the data, then plot the network. Can you see any trends in the network? Start with fitting a REM model and add the statistics of interest sequentially. Can you see any improvement based on the BIC?

Hint: Consider some of these statistics:“PSAB-BA”, “ISPSnd”, “PSAB-BY”, “PSAB-XB”, “NIDSnd”, “NIDRec”, and “NODSnd”.

“PSAB-BA” is a turn receiving effect - a receiver of a communication event immediately returns the communication event back \(AB \rightarrow BA\).

“ISPSnd” - an effect where nodes that have a higher number of shared nodes that they have received communication events from increases the likelihood of these nodes communicating in the future.

“PSAB-BY” is a turn receiving effect when a participant receiving a communication event is the one initiating the next communication effect \(AB \rightarrow BY\).

“PSAB-XB” is a turn usurping effect where a communication event from A to B is immediately followed by a communication event from X to B \(AB \rightarrow XB\).

“NIDSnd” - normalized indegree of a node affects it’s future rate of initiating communication events.

“NIDRec” - normalized indegree of a node affects it’s future rate of receiving communication events.

“NODSnd” - normalized outdegree of a node affects it’s future rate of initiating communication events.

head(Twitter_data_rem3)
##   time_day source target
## 1     0.00      8      1
## 2   121.03     28      2
## 3   199.08     28      2
## 4   266.95      4      3
## 5   573.59     22      5
## 6   574.49     25      5
twitter_NET <- as.sociomatrix.eventlist(Twitter_data_rem3 ,39)

gplot(twitter_NET, edge.lwd=twitter_NET^0.75, arrowhead.cex=
          log(as.edgelist.sna(twitter_NET)[,3])+.25,
      vertex.col= "black",
      vertex.cex=1.25)  
Twitter Data Network

Twitter Data Network

twitter1<- rem.dyad(Twitter_data_rem3,n=39, effects = c("PSAB-BA", "PSAB-BY"), ordinal = FALSE,hessian = TRUE)
## Prepping edgelist.
## Checking/prepping covariates.
## Computing preliminary statistics
## Fitting model
## Obtaining goodness-of-fit statistics
summary(twitter1)
## Relational Event Model (Temporal Likelihood)
## 
##         Estimate  Std.Err Z value  Pr(>|z|)    
## PSAB-BA -6.76132  0.58568 -11.544 < 2.2e-16 ***
## PSAB-BY -8.28467  0.20444 -40.523 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Null deviance: 25024.13 on 1405 degrees of freedom
## Residual deviance: 7425381 on 1404 degrees of freedom
##  Chi-square: -7400357 on 1 degrees of freedom, asymptotic p-value 1 
## AIC: 7425385 AICC: 7425385 BIC: 7425396