R语言推特twitter转发可视化分析

2020-08-20 15:13:30 浏览数 (1)

原文链接:http://tecdat.cn/?p=5124

包含术语“生物信息学”的推文示例

第1步:加载所需的软件包

代码语言:javascript复制

# 加载包

library(twitteR)


library(igraph)


library(stringr)

第2步:收集关于“生物信息学”的推文

代码语言:javascript复制

# tweets


dm_tweets = searchTwitter("bioinformatics", n=500,)


#获取推文


dm_txt = sapply(dm_tweets, function(x) x$getText())

第3步:识别转发

代码语言:javascript复制


# 正则表达式获取推文


grep("(RT|via)((?:\b\W*@\w ) )", dm_tweets,


ignore.case=TRUE, value=TRUE)


 


rt_patterns = grep("(RT|via)((?:\b\W*@\w ) )",


dm_txt, ignore.case=TRUE)




dm_txt[rt_patterns]

第4步:收集谁转发和谁发布

我们将使用这些结果来形成边缘列表以创建图形

代码语言:javascript复制

# 创建列表存储信息


who_retweet = as.list(1:length(rt_patterns))


who_post = as.list(1:length(rt_patterns))


# 循环


for (i in 1:length(rt_patterns))


{


# 获取推文和转发


twit = dm_tweets[[rt_patterns[i]]]






poster = str_extract_all(twit$getText(),


"(RT|via)((?:\b\W*@\w ) )")


#删除 ':'


poster = gsub(":", "", unlist(poster))






who_post[[i]] = gsub("(RT @|via @)", "", poster, ignore.case=TRUE)


# 转发用户


who_retweet[[i]] = rep(twit$getScreenName(), length(poster))


}


# unlist


who_post = unlist(who_post)


who_retweet = unlist(who_retweet)

第5步:从编辑清单创建图形

代码语言:javascript复制


# 边


retweeter_poster = cbind(who_retweet, who_post)


# 绘制图像


rt_graph = graph.edgelist(retweeter_poster)


# 得到点的名称


ver_labs = get.vertex.attribute(rt_graph, "name", index=V(rt_graph))

第6步:让我们绘制图

代码语言:javascript复制


# 绘图布局


glay = layout.fruchterman.reingold(rt_graph)


# 绘制


par(bg="gray15", mar=c(1,1,1,1))


plot(rt_graph, layout=glay,


vertex.color="gray25",


vertex.size=10,


vertex.label=ver_labs,


vertex.label.family="sans",


vertex.shape="none",


vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),


vertex.label.cex=0.85,


edge.arrow.size=0.8,


edge.arrow.width=0.5,


edge.width=3,


edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))


# 添加标题


title("nTweets with 'bioinformatics': Who retweets whom",


cex.main=1, col.main="gray95")

第7步:让我们试着给它一个更生物信息学的外观

代码语言:javascript复制

#


par(bg="gray15", mar=c(1,1,1,1))


plot(rt_graph, layout=glay,


vertex.color=hsv(h=.35, s=1, v=.7, alpha=0.1),


vertex.frame.color=hsv(h=.35, s=1, v=.7, alpha=0.1),


vertex.size=5,


vertex.label=ver_labs,


vertex.label.family="mono",


vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),


vertex.label.cex=0.85,


edge.arrow.size=0.8,


edge.arrow.width=0.5,


edge.width=3,


edge.color=hsv(h=.35, s=1, v=.7, alpha=0.4))


# 添加标题


title("nTweets with 'bioinformatics': Who retweets whom",


cex.main=1, col.main="gray95", family="mono")

0 人点赞