【R】ヒストグラムに正規分布を重ねる方法
2021年6月29日
表題の方法がこちらに載っていたので、試してみた。
まずは、データを準備します。
library(tidyverse) library(readr) d <- read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/openintro/speed_gender_height.csv") d2 <- d %>% select(-gender) d_summary <- d %>% group_by(gender) %>% summarise(height_m = mean(height, na.rm = T), height_sd = sd(height, na.rm = T))
-- Column specification -------------------------------------------------------------------------------------------------
cols(
X1 = col_double(),
speed = col_double(),
gender = col_character(),
height = col_double()
)
一番目は、手堅くggplotを使う方法。
d %>% ggplot() + geom_histogram(aes(y = ..density.., x = height, fill = gender)) + facet_wrap(~ gender) + geom_histogram(data = d2, aes(y = ..density.., x = height), alpha = .35) + stat_function(data = d_summary %>% filter(gender == "female"), fun = dnorm, color = "darkred", args = list(mean = filter(d_summary, gender == "female")$height_m, sd = filter(d_summary, gender == "female")$height_sd)) + stat_function(data = d_summary %>% filter(gender == "male"), fun = dnorm, color = "darkblue", args = list(mean = filter(d_summary, gender == "male")$height_m, sd = filter(d_summary, gender == "male")$height_sd)) + theme(legend.position = "none", axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) + labs(title = "Facetted histograms with overlaid normal curves", caption = "The grey histograms shows the whole distribution (over) both groups, i.e. females and men") + scale_fill_brewer(type = "qual", palette = "Set1")
次は、ggformula
というパッケージを使う方法。とっても簡単です。
library(ggformula) gf_dens( ~ height | gender, data = d) %>% gf_fitdistr(color = "red") %>% gf_fitdistr(dist = "normal", color = "blue")
最後に、ggh4x
というパッケージを使う方法。こちらも簡単!
library(ggh4x) ggplot(d, aes(height)) + geom_density() + stat_theodensity(colour = "red") + facet_wrap(~ gender)