# list of required packages
= c("ggplot2", "dplyr", "readr", "ggdensity", "ggforce")
packages
# install packages if not already installed
for (package in packages) {
if (!requireNamespace(package, quietly = TRUE)) {
install.packages(package)
} }
Reproducing MLB Baseball Savant
The goal of this tutorial is to recreate several graphics from Major League Baseball’s Savant, which utilizes Statcast data. Specifically, we will recreate three visualizations for Gerrit Cole.
This tutorial makes heavy use of dplyr
and ggplot2
. Documentation, including cheat sheets, can be found at:
Setup
# load required packages
library("ggplot2")
library("dplyr")
library("readr")
library("ggdensity")
library("ggforce")
# load data
= read_csv("https://daviddalpiaz.github.io/cmsac-2024/data/cole2024.csv") cole2024
- Statcast Search
- Statcast Search CSV Documentation
- Statcast Data Reference | Analyzing Baseball Data with R
saberpowers/sabRmetrics
# preview data
cole2024
# preview columns of interest
|>
cole2024 select(
pitch_type,
pitch_name,
release_speed,
release_spin_rate,
plate_x,
plate_z,
pfx_x,
pfx_z )
# calculate mix pitch and pitch summary statistics
= {
cole2024_stats |>
cole2024 group_by(pitch_type, pitch_name) |>
summarise(prop = n() / nrow(cole2024),
ave_velo = mean(release_speed),
ave_spin = mean(release_spin_rate),
.groups = "drop"
|>
) arrange(desc(prop))
} cole2024_stats
Pitch Velocity Distribution
Starter Code and Graphic
ggplot(cole2024) +
aes(x = release_speed) +
geom_density()
Full Code and Final Graphic
Full Solution Code
ggplot(cole2024) +
aes(x = release_speed,
y = 100 * after_stat(count) / nrow(cole2024),
fill = pitch_type
+
) geom_density(alpha = 0.5) +
theme_bw() +
xlim(c(70, 100)) +
scale_y_continuous(
limits = c(0, 13),
breaks = setNames(
seq(0, 12, by = 2),
paste0(seq(0, 12, by = 2), "%"))
+
) xlab("Pitch Speed (mph)") +
ylab("Frequency of Speed") +
labs(fill = "Pitch Type")
Pitch Arsenal
Starter Code and Graphic
# get pitch types in order of frequency
|>
cole2024_stats pull(pitch_type) |>
dput()
c("FF", "KC", "FC", "SL", "CH", "SI")
# make pitch types a factor ordered by frequency
$pitch_type = factor(
cole2024$pitch_type,
cole2024levels = c("FF", "KC", "FC", "SL", "CH", "SI"))
# create geom to visualize the strikezone
= function(sz_top = 3.8, sz_bot = 1.1) {
geom_strikezone = 17 + 2 * (9 / pi)
plate_width = -(plate_width / 2) / 12
sz_left = (plate_width / 2) / 12
sz_right = data.frame(
strikezone x = c(sz_left, sz_left, sz_right, sz_right, sz_left),
y = c(sz_bot, sz_top, sz_top, sz_bot, sz_bot)
)geom_path(
mapping = aes(.data$x, .data$y),
data = strikezone,
linewidth = 0.5,
linetype = 1,
color = "black"
) }
|>
cole2024 ggplot() +
aes(x = plate_x, y = plate_z) +
geom_point(
aes(color = pitch_type),
alpha = 0.6
+
) geom_strikezone()
Full Code and Final Graphic
Full Solution Code
= {
location_plot ggplot(cole2024) +
aes(x = plate_x, y = plate_z) +
xlim(c(-3, 3)) +
ylim(c(-2, 6)) +
xlab("") +
ylab("") +
theme_bw() +
coord_fixed() +
facet_wrap(vars(pitch_type))
}
+
location_plot geom_hdr(
method = method_kde(h = 0.75),
probs = c(0.90, 0.70, 0.50, 0.40, 0.30, 0.20),
show.legend = FALSE,
aes(fill = after_stat(probs)),
alpha = 0.75
+
) scale_fill_brewer(
palette = "RdBu",
direction = -1
+
) geom_strikezone()
Full Solution Code
+
location_plot geom_point(
aes(fill = pitch_type),
alpha = 1,
show.legend = FALSE,
pch = 21
+
) geom_strikezone()
Movement Profile
Starter Code and Graphic
|>
cole2024 mutate(hb = 12 * pfx_x) |>
mutate(ivb = 12 * pfx_z) |>
ggplot() +
aes(x = hb,
y = ivb,
colour = pitch_type
+
) geom_point(alpha = 0.75)
Full Code and Final Graphic
Full Solution Code
set.seed(2024)
|>
cole2024 mutate(hb = -12 * pfx_x) |>
mutate(ivb = 12 * pfx_z) |>
slice_sample(n = 200) |>
ggplot() +
aes(x = hb,
y = ivb,
colour = pitch_type
+
) geom_hline(yintercept = 0, linewidth = 0.1) +
geom_vline(xintercept = 0, linewidth = 0.1) +
geom_circle(
data = data.frame(
x0 = 0,
y0 = 0,
r = 6 * 1:4,
lt = factor(c(2, 1, 2, 1))
),mapping = aes(
x0 = x0,
y0 = y0,
r = r,
linetype = lt
),inherit.aes = FALSE,
linewidth = 0.25,
show.legend = FALSE
+
) geom_point(
alpha = 1,
size = 2
+
) geom_text(
data = data.frame(
x = c(-22, -16, -10, -4),
y = 1,
text = c("24\"", "18\"", "12\"", "6\"")
),mapping = aes(x = x, y = y, label = text),
inherit.aes = FALSE,
family = "mono"
+
) scale_x_continuous(limits = c(-24, 24), breaks = 12 * -2:2) +
scale_y_continuous(limits = c(-24, 24), breaks = 12 * -2:2) +
theme_void() +
coord_fixed() +
xlab("Horizontal Break (Inches)") +
ylab("Induced Vertical Break (Inches)") +
labs(colour = "Pitch Type")