install.packages("mosaic")
HomeRun <- read.csv("https://sullystats.github.io/Statistics6e/Data/CMC3_2020/HomeRuns_2019_traditional.csv")
head(HomeRun, n=4)
## Player Exit_Velocity Launch_Angle Pitch_Speed Distance Opposing_Team
## 1 Nomar Mazara 109.7 27 94.7 505 CWS
## 2 Miguel Sano 113.5 25 92.3 496 CWS
## 3 Pete Alonso 110.9 30 86.6 489 MIN
## 4 Rangel Ravelo 111.5 26 84.1 487 COL
library(mosaic)
gf_histogram(~Distance,data=HomeRun,binwidth=10,color="black",fill="blue",xlab="Distance (in feet)",ylab="Frequency",title="Distance of a Home Run in 2019")
favstats(~Distance,data=HomeRun)
## min Q1 median Q3 max mean sd n missing
## 307 382 401 418 505 400.3023 26.01616 6653 0
mean(~Distance,data=sample(HomeRun,9)) # Find the mean of a sample of size 9
## [1] 400.2222
SamplingDist <- bind_rows(do(5000)*c(mean = mean(~Distance,data=sample(HomeRun,9))))
head(SamplingDist,n=4)
## mean
## 1 403.5556
## 2 397.8889
## 3 408.6667
## 4 387.7778
gf_histogram(~mean,data=SamplingDist,binwidth=5,color="black",fill="blue",xlab="Mean Distance (in feet)",ylab="Frequency",title="Distribution of the Sample Mean Distance of a Home Run in 2019 with n = 9")