宝可梦: 数据可视化作业
数据准备:宝可梦属性必知必会
- Type(属性):共十多种,到第五世代开始,宝可梦可以拥有两个属性
- Total(能力值):也叫种族值,表示该种宝可梦各项能力的数值总和
- HP(体力值)
- Attack(物理攻击力)
- Defence(物理防御力)
- SpAtk, SpecialAttack(特殊攻击力)
- SpDef, SpecialDefence(特殊防御力)
- Speed(速度)
-
Generation(世代):第一代即为大家熟知的皮卡丘、杰尼龟等,本次作业的数据集为前六个世代的宝可梦
-
Legendary(传说):神兽。通常被主角小智撞见,或者剧场版登场的稀有宝可梦
数据简介
#加载
library(tidyverse)
#处理图标
library(png)
ibrary(grid)
#读取数据
pokemon <- read.csv("Pokemon.csv")
#查看数据概况
glimpse(pokemon)
#数据整理
colnames(pokemon)[1] <- "ID"
colnames(pokemon) <- str_replace_all(names(pokemon), "\\.", "")
#
names(Gen) <- c(1:6)
names(Type) <- levels(as.factor(pokemon$Type1))
names(Stat) <- levels(pkm_long$Key)
#长数据转换
pkm_long <- pokemon %>%
pivot_longer(c(6:11), names_to = "Key", values_to = "Value")
#筛选
pkm_nonLegendary <- pokemon %>%
filter(Legendary == "False")
#载入图标
img <- readPNG("pokemon.png")
img <- rasterGrob(img, interpolate = TRUE)
#取色
Gen <- c("#DA4511", "#FFBD00", "#6A953F", "#9A6233", "#D3AE7C", "#307CA1")
Type <- c("#a8b820","#705848","#7860e0","#f8c030","#FFAAFF","#a05038",
"#f05030","#98a8f0","#6060b0","#78c850","#d0b058","#58c8e0",
"#a8a090","#b058a0","#f870a0","#b8a058","#a8a8c0","#3898f8")
Stat <- c("#f5ac78","#fae078","#ff5959","#9cb4f3","#a3da8a","#fa92b2")
可视化
属性
#原生属性的宝可梦数量
pokemon %>% group_by(Type1) %>%
summarise(count = n()) %>%
ggplot(aes(fct_reorder(Type1, count, desc), count, fill = Type1), color = "black") +
geom_bar(stat = "identity", position = "dodge", show.legend = F) +
geom_text(aes(label = count), nudge_y = 2.5) +
coord_flip() +
scale_fill_manual(values = Type) +
labs(x = NULL, y = NULL,
title = "Num of Pokemons by Type",
subtitle = "All pokemons have a primary type. But not all pokemons have a secondary type.") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
axis.text.x = element_blank(),
plot.title = element_text(face = "bold")
) +
annotation_custom(img, xmin = 10, xmax = 20, ymin = 60, ymax = 112)
图1. 原生属性的宝可梦数量
第五世代引入双属性后,飞行系(Flying)的宝可梦通常都同时具备"普通"(Normal)属性,仅剩的原生的飞行系宝可梦寥寥无几。
相反,原生的水系(Water)的宝可梦数量最多。
#双属性的宝可梦数量
dual_type <- pokemon %>%
filter(Type2 != "") %>%
group_by(Type1, Type2) %>%
summarise(count = n())
dual_type %>% ggplot(aes(Type1, Type2)) +
annotation_custom(img_bw) +
geom_raster(aes(fill = count), alpha = .9) +
geom_text(aes(label = count)) +
scale_fill_gradient(high = "#3f5da1", low = "#fbcb04") +
labs(x = "Primary Type", y = "Secondary Type",
title = "Type Combos",
subtitle = "Number of dual type pokemons") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold")
)
图2. 双属性的宝可梦数量
可以看到,同时具备飞行(Flying)和普通(Normal)属性的宝可梦数量多达 24。
能力值
#宝可梦六项能力值的分布
pkm_long %>% ggplot(aes(Value)) +
annotation_custom(img_bw) +
geom_histogram(aes(fill = Key), color = "black", show.legend = F, alpha = .9) +
facet_wrap(~Key, ncol = 3, nrow = 2) +
scale_fill_manual(values = Stat) +
labs(x = NULL, y = NULL,
title = "Distribution of Stat - All Pokemons") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
strip.text = element_text(color = "#3f5da1", face = "bold"),
strip.background = element_rect(color = "#3f5da1", fill = "#fbcb04", linetype = "solid")
)
图3. 宝可梦六项能力值的分布
所有宝可梦的能力值显示,整个宝可梦世界的强弱分布相对合理,各项能力的杰出者都只占很少的部分。
#天赋异禀,攻防兼备
pokemon %>% ggplot(aes(Attack, Defense)) +
geom_point(aes(color = as.factor(Generation), shape = Legendary)) +
geom_smooth(method = "lm") +
scale_color_manual(name = "Generation", values = Gen) +
annotate("text", x = 150, y = 200, label = expression(italic(y) == 0.42 * italic(x) + 40.55), parse = TRUE) +
labs(title = "The More Aggressive, the More Defensive",
subtitle = "Linear Relationship between Attack and Defense") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold")
) +
annotation_custom(img, xmin = 125, xmax = 175, ymin = 210, ymax = 235)
图4. 天赋异禀,攻防兼备
物理攻击力越高的宝可梦,物理防御力通常也比较强,呈现出显著的正相关。这可能是因为宝可梦进化后,通常各项能力均有成长。
#神兽的攻守能力更加平衡
pokemon %>% ggplot(aes(Attack, Defense)) +
geom_point(aes(color = as.factor(Generation), shape = Legendary)) +
geom_smooth(method = "lm") +
facet_grid(~Legendary, scales = "free") +
scale_color_manual(name = "Generation", values = Gen) +
labs(title = "Simpson's Paradox:",
subtitle = "Association in a Population Reverses When the Population is Divided into Subpopulations") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
strip.text = element_text(color = "#3f5da1", face = "bold"),
strip.background = element_rect(color = "#3f5da1", fill = "#fbcb04")
)
图5. 神兽的攻守能力更加平衡
如果只看传说宝可梦的物理攻击力和防御力,原本正相关的关系则不复存在。神兽的攻守能力可能更加均衡。
属性 x 能力值
#能力强者,龙系宝可梦
pokemon %>% group_by(Type1) %>%
summarise(type_mean = mean(Total)) %>%
ungroup() %>%
ggplot(aes(fct_reorder(Type1, type_mean), type_mean)) +
annotation_custom(img_bw) +
geom_point(aes(color = Type1, size = type_mean), alpha = .9, show.legend = F) +
geom_segment(aes(xend = Type1, y = 375, yend = type_mean, color = Type1), linetype = "dashed", alpha = .5, show.legend = F) +
coord_flip() +
scale_color_manual(values = Type) +
ylim(375,575) +
labs(x = NULL, y = NULL,
title = "Dragon-Type Has the Highest Mean Total Stats") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold")
)
图6. 能力强者,龙系宝可梦
从能力值上看,龙系(Dragon)可能是宝可梦世界最强的属性。
#不同属性宝可梦能力值分布
pokemon %>% ggplot(aes(Total)) +
geom_density(aes(fill = Type1), alpha = .5, show.legend = F) +
facet_wrap(~Type1, nrow = 3) +
scale_fill_manual(values = Type) +
labs(x = "Total Stats", y = "Density",
title = "Distribution of Mean Total Stats by Primary Types") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
)
图7. 不同属性宝可梦能力值分布
多个属性的宝可梦在能力值上表现为双峰分布,从该图也能明显看出龙系宝可梦数值上偏强的设计。
#所有属性的宝可梦在各项能力上超过/低于,全部宝可梦整体平均水平的数量
type_key %>% ggplot(aes(Key, mean_value - 72.5)) +
geom_bar(aes(fill = above), stat = 'identity', color = "black") +
scale_fill_manual(name = "Above Mean Total Stats", values = c("Yes" = "red3", "No" = "whitesmoke")) +
#scale_color_manual() +
facet_wrap(~Type1, ncol = 6) +
coord_flip() +
labs(x = NULL, y = NULL,
title = "Characteristics of Pokemon's Stats by Each Primary Type") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
strip.text = element_text(color = "whitesmoke", face = "bold"),
strip.background = element_rect(color = "black", fill = "red3"),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
)
图8. 各个属性宝可梦的能力值水平
所有属性的宝可梦在各项能力上,超过/低于全部宝可梦整体平均水平的数量。可以看出龙系(Dragon)宝可梦具有相对较高的物理攻击力,钢系(Steel)宝可梦具有相对较高的物理防御力。
御三家
#六世代御三家的能力值雷达
starter %>%
ggplot(aes(Key, Value, fill = Type1)) +
geom_bar(stat = "identity",
#alpha = .6,
color = "black",
position = "dodge",
show.legend = F) +
coord_polar() +
facet_wrap(~Generation) +
scale_fill_manual(values = Type) +
labs(x = NULL, y = NULL,
title = "Characteristics of Starter's Stats by Generation - with Evolution") +
theme(
panel.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 75, hjust = 0)
)
图9. 六世代御三家的能力值雷达
#六世代御三家的能力值水平均值
starter %>%
ggplot(aes(Key, Value)) +
annotation_custom(img, xmin = 5.5, xmax = 6.5, ymin = 20, ymax = 50) +
geom_line(aes(group = Name, color = Type1), alpha = .5, show.legend = F) +
geom_point(aes(size = Evolution, color = Type1), alpha = .8, show.legend = F) +
facet_wrap(~Generation) +
scale_color_manual(values = Type) +
labs(x = NULL, y = NULL,
title = "Characteristics of Starter's Stats by Generation - with Evolution") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold")
)
图10. 六世代御三家的能力值水平均值
#六世代御三家的能力值在同属性宝可梦中的表现
pkm_nonLegendary %>%
pivot_longer(c(6:11), names_to = "Key", values_to = "Value") %>%
filter(Type1 %in% c("Grass", "Fire", "Water")) %>%
ggplot(aes(Key, Value)) +
annotation_custom(img_bw) +
geom_jitter(aes(color = Key), alpha = .8, show.legend = F) +
geom_point(data = starter_long, aes(Key, Value, color = Generation),
size = 2,
shape = 20,
alpha = .3,
show.legend = F) +
coord_flip() +
facet_wrap(~Type1) +
scale_color_manual(values = c(Stat, Gen)) +
labs(x = NULL, y = NULL,
title = "Characteristics of Starters' Stats in All Pokemon of its Type") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
)
图11. 六世代御三家的能力值在同属性宝可梦中的表现
#六世代御三家的能力值在同属性宝可梦中的表现
pkm_nonLegendary %>%
pivot_longer(c(6:11), names_to = "Key", values_to = "Value") %>%
filter(Type1 %in% c("Grass", "Fire", "Water")) %>%
ggplot(aes(Key, Value)) +
annotation_custom(img_bw) +
geom_violin(aes(fill = Key, color = Key), alpha = .8, show.legend = F) +
geom_point(data = starter_long, aes(Key, Value, color = Generation),
size = 2,
shape = 20,
alpha = .2,
show.legend = F) +
coord_flip() +
facet_wrap(~Type1) +
scale_color_manual(values = c(Stat, Gen)) +
scale_fill_manual(values = c(Stat, Gen)) +
labs(x = NULL, y = NULL,
title = "Characteristics of Starters' Stats in All Pokemon of its Type") +
theme(
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
plot.background = element_blank(),
plot.title = element_text(face = "bold"),
)