宝可梦: 数据可视化作业

数据准备:宝可梦属性必知必会

  1. HP(体力值)
  2. Attack(物理攻击力)
  3. Defence(物理防御力)
  4. SpAtk, SpecialAttack(特殊攻击力)
  5. SpDef, SpecialDefence(特殊防御力)
  6. Speed(速度)

数据简介
#加载
library(tidyverse)

#处理图标
library(png)
ibrary(grid)

#读取数据
pokemon <- read.csv("Pokemon.csv")
#查看数据概况
glimpse(pokemon)

#数据整理
colnames(pokemon)[1] <- "ID"
colnames(pokemon) <- str_replace_all(names(pokemon), "\\.", "")
#
names(Gen) <- c(1:6)
names(Type) <- levels(as.factor(pokemon$Type1))
names(Stat) <- levels(pkm_long$Key)

#长数据转换
pkm_long <- pokemon %>%
    pivot_longer(c(6:11), names_to = "Key", values_to = "Value")

#筛选
pkm_nonLegendary <- pokemon %>% 
    filter(Legendary == "False")

#载入图标
img <- readPNG("pokemon.png")
img <- rasterGrob(img, interpolate = TRUE)

#取色
Gen <- c("#DA4511", "#FFBD00", "#6A953F", "#9A6233", "#D3AE7C", "#307CA1")
Type <- c("#a8b820","#705848","#7860e0","#f8c030","#FFAAFF","#a05038",
            "#f05030","#98a8f0","#6060b0","#78c850","#d0b058","#58c8e0",
            "#a8a090","#b058a0","#f870a0","#b8a058","#a8a8c0","#3898f8")
Stat <- c("#f5ac78","#fae078","#ff5959","#9cb4f3","#a3da8a","#fa92b2")

可视化

属性

#原生属性的宝可梦数量
pokemon %>% group_by(Type1) %>%
 summarise(count = n()) %>% 
 ggplot(aes(fct_reorder(Type1, count, desc), count, fill = Type1), color = "black") +
 geom_bar(stat = "identity", position = "dodge", show.legend = F) +
 geom_text(aes(label = count), nudge_y = 2.5) +
 coord_flip() +
 scale_fill_manual(values = Type) +
 labs(x = NULL, y = NULL,
   title = "Num of Pokemons by Type", 
   subtitle = "All pokemons have a primary type. But not all pokemons have a secondary type.") +
 theme(
  panel.background = element_blank(),
  panel.border = element_rect(fill = NA),
  plot.background = element_blank(),
  axis.text.x = element_blank(),
  plot.title = element_text(face = "bold")
 ) +
 annotation_custom(img, xmin = 10, xmax = 20, ymin = 60, ymax = 112)

图1. 原生属性的宝可梦数量

第五世代引入双属性后,飞行系(Flying)的宝可梦通常都同时具备"普通"(Normal)属性,仅剩的原生的飞行系宝可梦寥寥无几。

相反,原生的水系(Water)的宝可梦数量最多。

#双属性的宝可梦数量
dual_type <- pokemon %>%
    filter(Type2 != "") %>%
    group_by(Type1, Type2) %>%
    summarise(count = n())

dual_type %>% ggplot(aes(Type1, Type2)) +
    annotation_custom(img_bw) +
    geom_raster(aes(fill = count), alpha = .9) +
    geom_text(aes(label = count)) +
    scale_fill_gradient(high = "#3f5da1", low = "#fbcb04") +
    labs(x = "Primary Type", y = "Secondary Type",
    title = "Type Combos", 
    subtitle = "Number of dual type pokemons") +
    theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold")
    )    

图2. 双属性的宝可梦数量

可以看到,同时具备飞行(Flying)和普通(Normal)属性的宝可梦数量多达 24。

能力值

#宝可梦六项能力值的分布
pkm_long %>% ggplot(aes(Value)) +
annotation_custom(img_bw) +
geom_histogram(aes(fill = Key), color = "black", show.legend = F, alpha = .9) +
facet_wrap(~Key, ncol = 3, nrow = 2) +
scale_fill_manual(values = Stat) +
labs(x = NULL, y = NULL,
        title = "Distribution of Stat - All Pokemons") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
    strip.text = element_text(color = "#3f5da1", face = "bold"),
    strip.background = element_rect(color = "#3f5da1", fill = "#fbcb04", linetype = "solid")
)

图3. 宝可梦六项能力值的分布

所有宝可梦的能力值显示,整个宝可梦世界的强弱分布相对合理,各项能力的杰出者都只占很少的部分。

#天赋异禀,攻防兼备
pokemon %>% ggplot(aes(Attack, Defense)) +
geom_point(aes(color = as.factor(Generation), shape = Legendary)) +
geom_smooth(method = "lm") +
scale_color_manual(name = "Generation", values = Gen) +
annotate("text", x = 150, y = 200, label = expression(italic(y) == 0.42 * italic(x) + 40.55), parse = TRUE) +
labs(title = "The More Aggressive, the More Defensive",
        subtitle = "Linear Relationship between Attack and Defense") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold")
    ) +
annotation_custom(img, xmin = 125, xmax = 175, ymin = 210, ymax = 235)

图4. 天赋异禀,攻防兼备

物理攻击力越高的宝可梦,物理防御力通常也比较强,呈现出显著的正相关。这可能是因为宝可梦进化后,通常各项能力均有成长。

#神兽的攻守能力更加平衡
pokemon %>% ggplot(aes(Attack, Defense)) +
geom_point(aes(color = as.factor(Generation), shape = Legendary)) +
geom_smooth(method = "lm") +
facet_grid(~Legendary, scales = "free") +
scale_color_manual(name = "Generation", values = Gen) +
labs(title = "Simpson's Paradox:",
subtitle = "Association in a Population Reverses When the Population is Divided into Subpopulations") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
    strip.text = element_text(color = "#3f5da1", face = "bold"),
    strip.background = element_rect(color = "#3f5da1", fill = "#fbcb04")
    )

图5. 神兽的攻守能力更加平衡

如果只看传说宝可梦的物理攻击力和防御力,原本正相关的关系则不复存在。神兽的攻守能力可能更加均衡。

属性 x 能力值

#能力强者,龙系宝可梦
pokemon %>% group_by(Type1) %>%
  summarise(type_mean = mean(Total)) %>%
  ungroup() %>%
  ggplot(aes(fct_reorder(Type1, type_mean), type_mean)) +
  annotation_custom(img_bw) +
  geom_point(aes(color = Type1, size = type_mean), alpha = .9, show.legend = F) +
  geom_segment(aes(xend = Type1, y = 375, yend = type_mean, color = Type1), linetype = "dashed", alpha = .5, show.legend = F) +
  coord_flip() +
  scale_color_manual(values = Type) +
  ylim(375,575) +
  labs(x = NULL, y = NULL,
       title = "Dragon-Type Has the Highest Mean Total Stats") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold")
  )

图6. 能力强者,龙系宝可梦

从能力值上看,龙系(Dragon)可能是宝可梦世界最强的属性。

#不同属性宝可梦能力值分布
pokemon %>% ggplot(aes(Total)) +
geom_density(aes(fill = Type1), alpha = .5, show.legend = F) +
facet_wrap(~Type1, nrow = 3) + 
scale_fill_manual(values = Type) +
labs(x = "Total Stats", y = "Density",
        title = "Distribution of Mean Total Stats by Primary Types") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
)

图7. 不同属性宝可梦能力值分布

多个属性的宝可梦在能力值上表现为双峰分布,从该图也能明显看出龙系宝可梦数值上偏强的设计。

#所有属性的宝可梦在各项能力上超过/低于,全部宝可梦整体平均水平的数量
type_key %>% ggplot(aes(Key, mean_value - 72.5)) + 
geom_bar(aes(fill = above), stat = 'identity', color = "black") + 
scale_fill_manual(name = "Above Mean Total Stats", values = c("Yes" = "red3", "No" = "whitesmoke")) + 
#scale_color_manual() + 
facet_wrap(~Type1, ncol = 6) + 
coord_flip() +
labs(x = NULL, y = NULL,
        title = "Characteristics of Pokemon's Stats by Each Primary Type") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
    strip.text = element_text(color = "whitesmoke", face = "bold"),
    strip.background = element_rect(color = "black", fill = "red3"),
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank()
)

图8. 各个属性宝可梦的能力值水平

所有属性的宝可梦在各项能力上,超过/低于全部宝可梦整体平均水平的数量。可以看出龙系(Dragon)宝可梦具有相对较高的物理攻击力,钢系(Steel)宝可梦具有相对较高的物理防御力。

御三家

#六世代御三家的能力值雷达
starter %>%
ggplot(aes(Key, Value, fill = Type1)) +
geom_bar(stat = "identity",
            #alpha = .6,
            color = "black", 
            position = "dodge",
            show.legend = F) +
coord_polar() +
facet_wrap(~Generation) +
scale_fill_manual(values = Type) +
labs(x = NULL, y = NULL,
        title = "Characteristics of Starter's Stats by Generation - with Evolution") +
theme(
    panel.background = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 75, hjust = 0)
    )

图9. 六世代御三家的能力值雷达
#六世代御三家的能力值水平均值
starter %>%
ggplot(aes(Key, Value)) +
annotation_custom(img, xmin = 5.5, xmax = 6.5, ymin = 20, ymax = 50) +
geom_line(aes(group = Name, color = Type1), alpha = .5, show.legend = F) +
geom_point(aes(size = Evolution, color = Type1), alpha = .8, show.legend = F) +
facet_wrap(~Generation) +
scale_color_manual(values = Type) +
labs(x = NULL, y = NULL,
        title = "Characteristics of Starter's Stats by Generation - with Evolution") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold")
)

图10. 六世代御三家的能力值水平均值
#六世代御三家的能力值在同属性宝可梦中的表现
pkm_nonLegendary %>%
pivot_longer(c(6:11), names_to = "Key", values_to = "Value") %>%
filter(Type1 %in% c("Grass", "Fire", "Water")) %>%
ggplot(aes(Key, Value)) +
annotation_custom(img_bw) +
geom_jitter(aes(color = Key), alpha = .8, show.legend = F) +
geom_point(data = starter_long, aes(Key, Value, color = Generation), 
            size = 2,
            shape = 20,
            alpha = .3,
            show.legend = F) +
coord_flip() +
facet_wrap(~Type1) +
scale_color_manual(values = c(Stat, Gen)) +
labs(x = NULL, y = NULL,
        title = "Characteristics of Starters' Stats in All Pokemon of its Type") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
)

图11. 六世代御三家的能力值在同属性宝可梦中的表现
#六世代御三家的能力值在同属性宝可梦中的表现
pkm_nonLegendary %>%
pivot_longer(c(6:11), names_to = "Key", values_to = "Value") %>%
filter(Type1 %in% c("Grass", "Fire", "Water")) %>%
ggplot(aes(Key, Value)) +
annotation_custom(img_bw) +
geom_violin(aes(fill = Key, color = Key), alpha = .8, show.legend = F) +
geom_point(data = starter_long, aes(Key, Value, color = Generation), 
            size = 2,            
            shape = 20,
            alpha = .2,
            show.legend = F) +
coord_flip() +
facet_wrap(~Type1) +
scale_color_manual(values = c(Stat, Gen)) +
scale_fill_manual(values = c(Stat, Gen)) +
labs(x = NULL, y = NULL,
        title = "Characteristics of Starters' Stats in All Pokemon of its Type") +
theme(
    panel.background = element_blank(),
    panel.border = element_rect(fill = NA),
    plot.background = element_blank(),
    plot.title = element_text(face = "bold"),
)

图12. 六世代御三家的能力值在同属性宝可梦中的表现