【R】ggplotをキレイに

ggplotできれいなグラフを描きましょう的な備忘録です。

The Evolution of a ggplot (Ep. 1)のページを参考に勉強してみました。

データは、TidyTuesdayのcoffeeです。

library(tidyverse)
library(ggplot2)
coffee <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv')
head(coffee)
> head(coffee)
# A tibble: 6 x 43
  total_cup_points species owner country_of_orig~ farm_name lot_number mill  ico_number company altitude region
             <dbl> <chr>   <chr> <chr>            <chr>     <chr>      <chr> <chr>      <chr>   <chr>    <chr> 
1             90.6 Arabica meta~ Ethiopia         "metad p~ NA         meta~ 2014/2015  metad ~ 1950-22~ guji-~
2             89.9 Arabica meta~ Ethiopia         "metad p~ NA         meta~ 2014/2015  metad ~ 1950-22~ guji-~
3             89.8 Arabica grou~ Guatemala        "san mar~ NA         NA    NA         NA      1600 - ~ NA    
4             89   Arabica yidn~ Ethiopia         "yidneka~ NA         wole~ NA         yidnek~ 1800-22~ oromia
5             88.8 Arabica meta~ Ethiopia         "metad p~ NA         meta~ 2014/2015  metad ~ 1950-22~ guji-~
6             88.8 Arabica ji-a~ Brazil            NA       NA         NA    NA         NA      NA       NA    
# ... with 32 more variables: producer <chr>, number_of_bags <dbl>, bag_weight <chr>, in_country_partner <chr>,
#   harvest_year <chr>, grading_date <chr>, owner_1 <chr>, variety <chr>, processing_method <chr>, aroma <dbl>,
#   flavor <dbl>, aftertaste <dbl>, acidity <dbl>, body <dbl>, balance <dbl>, uniformity <dbl>, clean_cup <dbl>,
#   sweetness <dbl>, cupper_points <dbl>, moisture <dbl>, category_one_defects <dbl>, quakers <dbl>, color <chr>,
#   category_two_defects <dbl>, expiration <chr>, certification_body <chr>, certification_address <chr>,
#   certification_contact <chr>, unit_of_measurement <chr>, altitude_low_meters <dbl>, altitude_high_meters <dbl>,
#   altitude_mean_meters <dbl>

まずはふつうのボックスプロット

g <- coffee %>%
  drop_na(any_of("country_of_origin"))  %>%
  filter(aroma != 0 & !country_of_origin %in% c("Zambia", "Rwanda", "Papua New Guinea", "Japan", "Mauritius", "Cote d?Ivoire")) %>%
  ggplot(aes(x = country_of_origin, y = total_cup_points, color = country_of_origin)) +
  coord_flip() +
  labs(x = NULL, y = "Total cup points", size = 5) +
  theme(
    legend.position = "none",
    axis.title = element_text(size = 14),
    axis.text.x = element_text(family = "Roboto Mono", size = 10),
    axis.text.y = element_text(family = "Roboto Mono", size = 10),
    panel.grid = element_blank()
  )

g +
  geom_boxplot(color = "gray80", outlier.alpha = 0)

色付けるだけでもキレイに見えます。

set.seed(123)
g +
  geom_boxplot(color = "gray80", outlier.alpha = 0) +
  geom_jitter(size = 1, alpha = 0.25, width = 0.2) +
  stat_summary(fun = mean, geom = "point", size = 4)

平均の線を入れてみたりします。

coffee_avg <-
  coffee %>%
  summarize(avg = mean(total_cup_points, na.rm = T)) %>%
  pull(avg)

g + 
  geom_hline(aes(yintercept = coffee_avg), color = "gray50", size = 0.6) +
  stat_summary(fun = mean, geom = "point", size = 4) +
  geom_jitter(size = 2, alpha = 0.25, width = 0.2) 

アノテーションと矢印いれるともっとわかりやすい。調整は面倒。。。

(g_text <-
    g +
    geom_hline(aes(yintercept = coffee_avg), color = "gray50", size = 0.6) +
    stat_summary(fun = mean, geom = "point", size = 4) +
    geom_jitter(size = 1, alpha = 0.25, width = 0.2) + 
    annotate(
      "text", x = 16.3, y = 88, family = "Poppins", size = 3, color = "gray20", lineheight = .6,
      label = glue::glue("Worldwide average:\n{round(coffee_avg, 1)} ")
    ) +
    annotate(
      "text", x = 11.5, y = 72, family = "Poppins", size = 3, color = "gray20",
      label = "Each country's average"
    ) 
)


arrows <-
  tibble(
    x1 = c(15.9, 11.5),
    x2 = c(15.6, 10),
    y1 = c(85, 75),
    y2 = c(coffee_avg, 77)
  )


g_text +
  geom_curve(
    data = arrows, aes(x = x1, y = y1, xend = x2, yend = y2),
    arrow = arrow(length = unit(0.07, "inch")), size = 0.4,
    color = "gray20", curvature = -0.3
  )

データは同じなのに、最初のボックスプロットとはかなり異なった印象です。

Add a Comment

メールアドレスが公開されることはありません。