Version 4: Customize
Setup
Packages
# use install.packages FIRST TIME ONLY
update.packages(
$package_location,
paramsrepos=NULL,
type="source"
)
library(irisproject)
library(dplyr)
library(ggplot2)
library(plotly)
Parameters
for (p in names(params)) {
print(paste0(p, ': ', params[[p]]))
}
## [1] "package_location: ./irisproject/"
## [1] "convert_cm: TRUE"
## [1] "plot_pairs_of: Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Length.Ratio, Width.Ratio, Circumference, Species"
Starting Data
head(iris) %>%
::kable(digits = 3) knitr
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
Data Manipulation
Convert Measurements
Converting data in first four columns from centimeters to inches
if (params$convert_cm) {
<- iris %>%
iris convert_measurements(1:4)
}
Ratio features
Computing two ratio features: ratio of lengths (sepal : petal) and ratio of widths (sepal : petal)
<- iris %>%
iris compute_ratios()
Circumference
Creating a new feature: circumference of the smallest circle that could circumscribe the flower (using the larger length as the radius).
<- iris %>%
iris compute_circumference()
Final Data
head(iris) %>%
::kable(digits = 3) knitr
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | Length.Ratio | Width.Ratio | Circumference |
---|---|---|---|---|---|---|---|
2.008 | 1.378 | 0.551 | 0.079 | setosa | 3.643 | 17.50 | 12.616 |
1.929 | 1.181 | 0.551 | 0.079 | setosa | 3.500 | 15.00 | 12.121 |
1.850 | 1.260 | 0.512 | 0.079 | setosa | 3.615 | 16.00 | 11.626 |
1.811 | 1.220 | 0.591 | 0.079 | setosa | 3.067 | 15.50 | 11.379 |
1.969 | 1.417 | 0.551 | 0.079 | setosa | 3.571 | 18.00 | 12.368 |
2.126 | 1.535 | 0.669 | 0.157 | setosa | 3.176 | 9.75 | 13.358 |
Plots
Pairs plot of select variables
<- strsplit(params$plot_pairs_of, ",")[[1]] %>% trimws() plot_pairs_of
Variables included:
- Sepal.Length
- Sepal.Width
- Petal.Length
- Petal.Width
- Length.Ratio
- Width.Ratio
- Circumference
- Species
pairs(iris[,plot_pairs_of])
Plot of circumference and length ratio by species
<- iris %>% ggplot(aes(
p x = Circumference,
y = Length.Ratio,
color = Species,
text = paste0(
"Species: ", Species,
"\nCircumference: ", round(Circumference,3),
"\nLength Ratio: ", round(Length.Ratio, 3),
"\nWidth Ratio: ", round(Width.Ratio, 3),
"\nPetal Width, Length: ", round(Petal.Width, 3),
", ", round(Petal.Length, 3),
"\nSepal Width, Length: ", round(Sepal.Width, 3),
", ", round(Sepal.Length, 3)
)+
)) geom_point() +
theme_classic() +
theme(
legend.position = 'top',
legend.justification = 'left'
)
::ggplotly(p, tooltip = c("text")) %>%
plotly::layout(
plotlylegend = list(
orientation = "h",
x = 0,
yanchor = "bottom",
y = 1.02
) )