【R】ggplotをキレイに
2020年7月10日
ggplotできれいなグラフを描きましょう的な備忘録です。
The Evolution of a ggplot (Ep. 1)のページを参考に勉強してみました。
データは、TidyTuesdayのcoffeeです。
library(tidyverse) library(ggplot2) coffee <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') head(coffee)
> head(coffee)
# A tibble: 6 x 43
total_cup_points species owner country_of_orig~ farm_name lot_number mill ico_number company altitude region
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 90.6 Arabica meta~ Ethiopia "metad p~ NA meta~ 2014/2015 metad ~ 1950-22~ guji-~
2 89.9 Arabica meta~ Ethiopia "metad p~ NA meta~ 2014/2015 metad ~ 1950-22~ guji-~
3 89.8 Arabica grou~ Guatemala "san mar~ NA NA NA NA 1600 - ~ NA
4 89 Arabica yidn~ Ethiopia "yidneka~ NA wole~ NA yidnek~ 1800-22~ oromia
5 88.8 Arabica meta~ Ethiopia "metad p~ NA meta~ 2014/2015 metad ~ 1950-22~ guji-~
6 88.8 Arabica ji-a~ Brazil NA NA NA NA NA NA NA
# ... with 32 more variables: producer <chr>, number_of_bags <dbl>, bag_weight <chr>, in_country_partner <chr>,
# harvest_year <chr>, grading_date <chr>, owner_1 <chr>, variety <chr>, processing_method <chr>, aroma <dbl>,
# flavor <dbl>, aftertaste <dbl>, acidity <dbl>, body <dbl>, balance <dbl>, uniformity <dbl>, clean_cup <dbl>,
# sweetness <dbl>, cupper_points <dbl>, moisture <dbl>, category_one_defects <dbl>, quakers <dbl>, color <chr>,
# category_two_defects <dbl>, expiration <chr>, certification_body <chr>, certification_address <chr>,
# certification_contact <chr>, unit_of_measurement <chr>, altitude_low_meters <dbl>, altitude_high_meters <dbl>,
# altitude_mean_meters <dbl>
まずはふつうのボックスプロット
g <- coffee %>% drop_na(any_of("country_of_origin")) %>% filter(aroma != 0 & !country_of_origin %in% c("Zambia", "Rwanda", "Papua New Guinea", "Japan", "Mauritius", "Cote d?Ivoire")) %>% ggplot(aes(x = country_of_origin, y = total_cup_points, color = country_of_origin)) + coord_flip() + labs(x = NULL, y = "Total cup points", size = 5) + theme( legend.position = "none", axis.title = element_text(size = 14), axis.text.x = element_text(family = "Roboto Mono", size = 10), axis.text.y = element_text(family = "Roboto Mono", size = 10), panel.grid = element_blank() ) g + geom_boxplot(color = "gray80", outlier.alpha = 0)
色付けるだけでもキレイに見えます。
set.seed(123) g + geom_boxplot(color = "gray80", outlier.alpha = 0) + geom_jitter(size = 1, alpha = 0.25, width = 0.2) + stat_summary(fun = mean, geom = "point", size = 4)
平均の線を入れてみたりします。
coffee_avg <- coffee %>% summarize(avg = mean(total_cup_points, na.rm = T)) %>% pull(avg) g + geom_hline(aes(yintercept = coffee_avg), color = "gray50", size = 0.6) + stat_summary(fun = mean, geom = "point", size = 4) + geom_jitter(size = 2, alpha = 0.25, width = 0.2)
アノテーションと矢印いれるともっとわかりやすい。調整は面倒。。。
(g_text <- g + geom_hline(aes(yintercept = coffee_avg), color = "gray50", size = 0.6) + stat_summary(fun = mean, geom = "point", size = 4) + geom_jitter(size = 1, alpha = 0.25, width = 0.2) + annotate( "text", x = 16.3, y = 88, family = "Poppins", size = 3, color = "gray20", lineheight = .6, label = glue::glue("Worldwide average:\n{round(coffee_avg, 1)} ") ) + annotate( "text", x = 11.5, y = 72, family = "Poppins", size = 3, color = "gray20", label = "Each country's average" ) ) arrows <- tibble( x1 = c(15.9, 11.5), x2 = c(15.6, 10), y1 = c(85, 75), y2 = c(coffee_avg, 77) ) g_text + geom_curve( data = arrows, aes(x = x1, y = y1, xend = x2, yend = y2), arrow = arrow(length = unit(0.07, "inch")), size = 0.4, color = "gray20", curvature = -0.3 )
データは同じなのに、最初のボックスプロットとはかなり異なった印象です。