get_started.RmdgraphTweets 4.0 has been redisigned to work hand-in-hand with rtweet. Let’s start by getting some tweets. If you’re unsure how to get started, head over to the rtweet website, everything is very well explained. We’ll get 1,000 tweets on #rstats, exluding re-tweets.
library(rtweet)
# 1'000 tweets on #rstats, excluding retweets
tweets <- search_tweets("#rstats", n = 500, include_rts = FALSE)Now we can start using graphTweets.
gt_edges.igraph object using gt_graph or collect results with gt_collect.If you do not want to return an igraph object, use gt_collect, it will return a list of two data.frames; edges and nodes.
tweets %>%
gt_edges(screen_name, mentions_screen_name) %>%
gt_collect() -> edges
names(edges)
#> [1] "edges" "nodes"(It also returns nodes but it’s empty since we only ran gt_edges).
So far we only used gt_edges to extract the edges, we can also extract the nodes.
tweets %>%
gt_edges(screen_name, mentions_screen_name) %>%
gt_nodes() %>%
gt_collect() -> graph
lapply(graph, nrow) # number of edges and nodes
#> $edges
#> [1] 248
#>
#> $nodes
#> [1] 309
lapply(graph, names) # names of data.frames returned
#> $edges
#> [1] "source" "target" "n"
#>
#> $nodes
#> [1] "nodes" "type" "n"On graphTweets version 0.4.1 gt_nodes returns the number of edges the node is present in: n_edges. Here I used gt_collect, you can, again, use gt_graph if you want to return an igraph object.
Adding nodes has not bring much to table however, gt_nodes takes another argument, meta, which if set to TRUE will return meta data on each node, where availbale*. More information on passing meta data to nodes further down the document.
tweets %>%
gt_edges(screen_name, mentions_screen_name) %>%
gt_nodes(meta = TRUE) %>%
gt_collect() -> graph
# lapply(graph, names) # names of data.frames returnedNote that you can also pass meta-data to edges if needed.
tweets %>%
gt_edges(screen_name, mentions_screen_name, created_at) %>%
gt_nodes(meta = TRUE) %>%
gt_collect() -> graphBefore we plot out graph, we’re going to modify some of the meta-data, a lot of NA are returned (where the meta-data was not available *).
Here I use sigmajs to plot the graph.
library(dplyr)
library(sigmajs) # for plots
#> Welcome to sigmajs
#>
#> Docs: sigmajs.john-coene.com
tweets %>%
gt_edges(screen_name, mentions_screen_name) %>%
gt_nodes() %>%
gt_collect() -> gt
nodes <- gt$nodes %>%
mutate(
id = nodes,
label = nodes,
size = n,
color = "#1967be"
)
edges <- gt$edges %>%
mutate(
id = 1:n()
)
sigmajs() %>%
sg_force_start() %>%
sg_nodes(nodes, id, label, size, color) %>%
sg_edges(edges, id, source, target) %>%
sg_force_stop(10000)Let’s look at communities, we’ll return an igraph object with gt_graph so we can easily run a community finding algorithm from the igraph package.
You can also build networks of retweets.
tweets <- search_tweets("#rstats filter:retweets", n = 500, include_rts = TRUE, token = token, lang = "en")
#> Searching for tweets...
#> Finished collecting tweets!net <- tweets %>%
gt_edges(screen_name, retweet_screen_name) %>%
gt_nodes() %>%
gt_collect()
c(edges, nodes) %<-% net
edges$id <- 1:nrow(edges)
edges$size <- edges$n
nodes$id <- nodes$nodes
nodes$label <- nodes$nodes
nodes$size <- nodes$n
sigmajs() %>%
sg_nodes(nodes, id, size, label) %>%
sg_edges(edges, id, source, target) %>%
sg_layout() %>%
sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
sg_settings(
edgeColor = "default",
defaultEdgeColor = "#d3d3d3"
) %>%
sg_neighbours()We can bind quoted tweets (surely they should be considered as retweets) using gt_bind_edges.
net <- tweets %>%
gt_edges(screen_name, retweet_screen_name) %>%
gt_edges_bind(screen_name, quoted_screen_name) %>%
gt_nodes() %>%
gt_collect()
c(edges, nodes) %<-% net
edges$id <- 1:nrow(edges)
edges$size <- edges$n
nodes$id <- nodes$nodes
nodes$label <- nodes$nodes
nodes$size <- nodes$n
sigmajs() %>%
sg_nodes(nodes, id, size, label) %>%
sg_edges(edges, id, source, target) %>%
sg_layout() %>%
sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
sg_settings(
edgeColor = "default",
defaultEdgeColor = "#d3d3d3"
) %>%
sg_neighbours()## Meta data
You can pass meta data to the edges and subsequently to the nodes using gt_add_meta.
gt <- tweets %>%
gt_edges(screen_name, retweet_screen_name, followers_count, retweet_followers_count) %>%
gt_nodes() %>%
gt_add_meta(name = size, source = followers_count, target = retweet_followers_count)
# size is now number of followers
head(gt$nodes)
#> # A tibble: 6 x 4
#> nodes type n size
#> <chr> <chr> <int> <int>
#> 1 _100daysofcode user 1 35137
#> 2 _abichat user 1 145
#> 3 _ajohnstone user 1 454
#> 4 _colinfay user 6 6458
#> 5 _reactdev user 3 2629
#> 6 _willdebras user 1 598
gt$edges$id <- 1:nrow(gt$edges)
gt$nodes$id <- gt$nodes$nodes
gt$nodes$label <- gt$nodes$nodes
gt$nodes$color <- scales::col_numeric(c("#41A5F4FF", "#0C46A0FF"), NULL)(gt$nodes$size)
sigmajs() %>%
sg_nodes(gt$nodes, id, size, label, color) %>%
sg_edges(gt$edges, id, source, target) %>%
sg_layout() %>%
sg_settings(
edgeColor = "default",
defaultEdgeColor = "#d3d3d3"
) %>%
sg_neighbours()## Preprocess edges
You may also pre-process edges before computing the nodes.
prep <- function(df){
df %>%
group_by(source, target) %>%
summarise(
n = sum(n), # number of tweets
nchar = sum(nchar(text)) / n # characters per tweet
) %>%
dplyr::ungroup()
}
gt <- tweets %>%
gt_edges(screen_name, retweet_screen_name, text) %>%
gt_preproc_edges(prep) %>%
gt_nodes()
gt$edges$id <- 1:nrow(gt$edges)
gt$nodes$id <- gt$nodes$nodes
gt$nodes$label <- gt$nodes$nodes
gt$nodes$size <- gt$nodes$n
gt$edges$color <- scales::col_numeric(c("blue", "red"), NULL)(gt$edges$nchar)
sigmajs() %>%
sg_nodes(gt$nodes, id, size, label) %>%
sg_edges(gt$edges, id, source, target, color) %>%
sg_layout() * Some nodes are mentioned in tweets only and therefore have no meta-data associated.