加载R包
代码语言:javascript
复制library(tidyverse)
install.packages("mapdata")
install.packages("stopwords")
library(mapdata)
library(ggtext)
library(stopwords)
library(tidytext)
library(ggrepel)
library(tidyverse)
导入数据
代码语言:javascript
复制historical_markers <- read_csv('historical_markers.csv')
no_markers <- read_csv('no_markers.csv')
数据清洗
代码语言:javascript
复制combined <- historical_markers %>%
filter(!(state_or_prov %in% c("Alaska", "Hawaii", "Puerto Rico"))) %>% # 筛选出除阿拉斯加、夏威夷和波多黎各外的州或省
group_by(state_or_prov) %>%
summarize(text = paste(title, collapse = " ")) # 按州或省分组,并将每组的标题合并成一个字符串
combined$words <- str_replace_all(combined$text, "[[:punct:]]", "") # 移除所有的标点符号
加载地图数据
代码语言:javascript
复制state_info <- map_data("state") # 加载美国各州的地图数据
state_labels <- state_info %>%
group_by(region) %>%
summarise(min_long = min(long),max_long = max(long),
min_lat = min(lat),max_lat = max(lat),
range_long = max_long - min_long,
range_lat = max_lat - min_lat,
long = min_long range_long/2,
lat = min_lat range_lat/2) %>%
mutate(long= case_when(region %in% c("michigan", "florida") ~ long 2,region == "idaho" ~ long -1,
region == "virginia" ~ long 1,RUE ~ long)) %>%
mutate(lat = case_when(region == "maryland" ~ lat 0.5,TRUE ~ lat)) %>%
select(region, long, lat) %>%
right_join(word_by_state, by = c("region" = "state_or_prov")) # 计算每个州的地理中心位置,并将其与词汇数据合并
数据可视化
代码语言:javascript
复制historical_markers %>%
filter(!(state_or_prov %in% c("Alaska", "Hawaii", "Puerto Rico"))) %>%
ggplot()
geom_polygon(aes(x=long, y=lat, group = group), data = state_info, fill = NA, color = "black", linewidth = 0.15)
coord_fixed(ratio = 1.3)
geom_density2d_filled(aes(x=longitude_minus_w, y=latitude_minus_s), show.legend = FALSE, alpha=0.4, bins=7)
scale_fill_manual(values = c("white", "#CEE9e9", "#84BBD8", "#F8F2BE", "#FEC376", "#F88A51", "#A50026"))
geom_text(data = state_labels %>% filter(!(region %in% c("massachusetts", "connecticut", "new jersey", "delaware",
"maryland", "district of columbia", "new hampshire"))),
aes(x = long, y = lat, label = word), size =2, inherit.aes = FALSE)
geom_text_repel(data = state_labels %>% filter(region %in% c("massachusetts", "connecticut", "new jersey", "delaware",
"maryland", "district of columbia", "new hampshire")),
aes(x=long, y=lat, label = word), nudge_x = c(5, 3, 5, 4, 5, 4, 4),
nudge_y = c(0, 0, -3, 0, 0, 0, 0), size = 2, min.segment.length = 0.2)
theme_classic()
theme(axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.background = element_blank(),
panel.background =element_blank())