Chapter 8 Down-sampled lexicase
8.1 Overview
8.2 Analysis dependencies
library(ggplot2)
library(tidyverse)
library(knitr)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(rstatix)
library(ggsignif)
library(Hmisc)
library(kableExtra)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")These analyses were conducted in the following computing environment:
## _
## platform x86_64-pc-linux-gnu
## arch x86_64
## os linux-gnu
## system x86_64, linux-gnu
## status
## major 4
## minor 1.0
## year 2021
## month 05
## day 18
## svn rev 80317
## language R
## version.string R version 4.1.0 (2021-05-18)
## nickname Camp Pontanezen
8.3 Setup
data_loc <- paste0(working_directory, "data/timeseries.csv")
data <- read.csv(data_loc, na.strings="NONE")
data$cardinality <- as.factor(
data$OBJECTIVE_CNT
)
data$selection_name <- as.factor(
data$selection_name
)
data$epsilon <- as.factor(
data$LEX_EPS
)
data$proportion <- factor(
data$DSLEX_PROP,
levels=c(1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01)
)
data$proportion_label <- data$DSLEX_PROP * 100
data$proportion_label <- paste0(data$proportion_label, "%")
data$proportion_label <- factor(
data$proportion_label,
levels=c("100%", "50%", "20%", "10%", "5%" , "2%", "1%")
)
data$elite_trait_avg <-
data$ele_agg_per / data$OBJECTIVE_CNT
data$unique_start_positions_coverage <-
data$uni_str_pos / data$OBJECTIVE_CNT
final_data <- filter(data, evaluations==max(data$evaluations))
# Labeler for stats annotations
p_label <- function(p_value) {
threshold = 0.0001
if (p_value < threshold) {
return(paste0("p < ", threshold))
} else {
return(paste0("p = ", p_value))
}
}
# Significance threshold
alpha <- 0.05
####### misc #######
# Configure our default graphing theme
theme_set(theme_cowplot())8.4 Exploration diagnostic performance
elite_ave_performance_fig <- ggplot(
data,
aes(
x=evaluations,
y=elite_trait_avg,
color=proportion_label,
fill=proportion_label
)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Average trait performance",
limits=c(0, 100)
) +
scale_x_continuous(
name="Evaluations"
) +
scale_fill_brewer(
name="Sampling rate",
palette=cb_palette
) +
scale_color_brewer(
name="Sampling rate",
palette=cb_palette
)
elite_ave_performance_fig
8.4.1 Final performance
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(elite_trait_avg ~ proportion_label) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="proportion_label",step.increase=1)
stat.test$manual_position <- stat.test$y.position * 1.05
stat.test$label <- mapply(p_label,stat.test$p.adj)elite_final_performance_fig <- ggplot(
final_data,
aes(
x=proportion_label,
y=elite_trait_avg,
fill=proportion_label
)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=proportion_label),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Average trait performance",
limits=c(0, 100)
) +
scale_x_discrete(
name="Sampling rate"
) +
scale_fill_brewer(
name="Sampling rate",
palette=cb_palette
) +
scale_color_brewer(
name="Sampling rate",
palette=cb_palette
) +
theme(
legend.position="none"
)
elite_final_performance_fig
stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | manual_position | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| elite_trait_avg | 100% | 50% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 184.488 | 100%, 50% | 1 | 2 | 193.7124 | p < 1e-04 |
| elite_trait_avg | 100% | 20% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 277.224 | 100%, 20% | 1 | 3 | 291.0852 | p < 1e-04 |
| elite_trait_avg | 100% | 10% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 369.960 | 100%, 10% | 1 | 4 | 388.4580 | p < 1e-04 |
| elite_trait_avg | 100% | 5% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 462.696 | 100%, 5% | 1 | 5 | 485.8308 | p < 1e-04 |
| elite_trait_avg | 100% | 2% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 555.432 | 100%, 2% | 1 | 6 | 583.2036 | p < 1e-04 |
| elite_trait_avg | 100% | 1% | 50 | 50 | 2500 | 0.00e+00 | 0.0000000 | **** | 648.168 | 100%, 1% | 1 | 7 | 680.5764 | p < 1e-04 |
| elite_trait_avg | 50% | 20% | 50 | 50 | 1344 | 5.19e-01 | 1.0000000 | ns | 740.904 | 50%, 20% | 2 | 3 | 777.9492 | p = 1 |
| elite_trait_avg | 50% | 10% | 50 | 50 | 1862 | 2.49e-05 | 0.0005229 | *** | 833.640 | 50%, 10% | 2 | 4 | 875.3220 | p = 0.0005229 |
| elite_trait_avg | 50% | 5% | 50 | 50 | 2093 | 0.00e+00 | 0.0000001 | **** | 926.376 | 50%, 5% | 2 | 5 | 972.6948 | p < 1e-04 |
| elite_trait_avg | 50% | 2% | 50 | 50 | 2167 | 0.00e+00 | 0.0000000 | **** | 1019.112 | 50%, 2% | 2 | 6 | 1070.0676 | p < 1e-04 |
| elite_trait_avg | 50% | 1% | 50 | 50 | 2199 | 0.00e+00 | 0.0000000 | **** | 1111.848 | 50%, 1% | 2 | 7 | 1167.4404 | p < 1e-04 |
| elite_trait_avg | 20% | 10% | 50 | 50 | 1623 | 1.00e-02 | 0.2100000 | ns | 1204.584 | 20%, 10% | 3 | 4 | 1264.8132 | p = 0.21 |
| elite_trait_avg | 20% | 5% | 50 | 50 | 1863 | 2.42e-05 | 0.0005082 | *** | 1297.320 | 20%, 5% | 3 | 5 | 1362.1860 | p = 0.0005082 |
| elite_trait_avg | 20% | 2% | 50 | 50 | 1974 | 6.00e-07 | 0.0000128 | **** | 1390.056 | 20%, 2% | 3 | 6 | 1459.5588 | p < 1e-04 |
| elite_trait_avg | 20% | 1% | 50 | 50 | 2064 | 0.00e+00 | 0.0000004 | **** | 1482.792 | 20%, 1% | 3 | 7 | 1556.9316 | p < 1e-04 |
| elite_trait_avg | 10% | 5% | 50 | 50 | 1673 | 4.00e-03 | 0.0840000 | ns | 1575.528 | 10%, 5% | 4 | 5 | 1654.3044 | p = 0.084 |
| elite_trait_avg | 10% | 2% | 50 | 50 | 1979 | 5.00e-07 | 0.0000107 | **** | 1668.264 | 10%, 2% | 4 | 6 | 1751.6772 | p < 1e-04 |
| elite_trait_avg | 10% | 1% | 50 | 50 | 2021 | 1.00e-07 | 0.0000023 | **** | 1761.000 | 10%, 1% | 4 | 7 | 1849.0500 | p < 1e-04 |
| elite_trait_avg | 5% | 2% | 50 | 50 | 1555 | 3.60e-02 | 0.7560000 | ns | 1853.736 | 5%, 2% | 5 | 6 | 1946.4228 | p = 0.756 |
| elite_trait_avg | 5% | 1% | 50 | 50 | 1674 | 4.00e-03 | 0.0840000 | ns | 1946.472 | 5%, 1% | 5 | 7 | 2043.7956 | p = 0.084 |
| elite_trait_avg | 2% | 1% | 50 | 50 | 1489 | 1.00e-01 | 1.0000000 | ns | 2039.208 | 2%, 1% | 6 | 7 | 2141.1684 | p = 1 |
8.5 Activation position coverage
unique_start_position_coverage_fig <- ggplot(
data,
aes(
x=evaluations,
y=unique_start_positions_coverage,
color=proportion_label,
fill=proportion_label
)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Activation position coverage",
limits=c(0.0, 1.0)
) +
scale_x_continuous(
name="Evaluations"
) +
scale_fill_brewer(
name="Sampling rate",
palette=cb_palette
) +
scale_color_brewer(
name="Sampling rate",
palette=cb_palette
)
unique_start_position_coverage_fig
8.5.1 Final activation position coverage
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(unique_start_positions_coverage ~ proportion_label) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="proportion_label",step.increase=1)
stat.test$manual_position <- stat.test$y.position * 1.05
stat.test$label <- mapply(p_label,stat.test$p.adj)unique_start_positions_coverage_final_fig <- ggplot(
final_data,
aes(
x=proportion_label,
y=unique_start_positions_coverage,
fill=proportion_label
)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=proportion_label),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Activation position coverage",
limits=c(0, 1.0)
) +
scale_x_discrete(
name="Sampling rate"
) +
scale_fill_brewer(
name="Sampling rate",
palette=cb_palette
) +
scale_color_brewer(
name="Sampling rate",
palette=cb_palette
) +
theme(
legend.position="none"
)
unique_start_positions_coverage_final_fig
stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | manual_position | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unique_start_positions_coverage | 100% | 50% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 0.7300 | 100%, 50% | 1 | 2 | 0.766500 | p < 1e-04 |
| unique_start_positions_coverage | 100% | 20% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 1.0975 | 100%, 20% | 1 | 3 | 1.152375 | p < 1e-04 |
| unique_start_positions_coverage | 100% | 10% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 1.4650 | 100%, 10% | 1 | 4 | 1.538250 | p < 1e-04 |
| unique_start_positions_coverage | 100% | 5% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 1.8325 | 100%, 5% | 1 | 5 | 1.924125 | p < 1e-04 |
| unique_start_positions_coverage | 100% | 2% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 2.2000 | 100%, 2% | 1 | 6 | 2.310000 | p < 1e-04 |
| unique_start_positions_coverage | 100% | 1% | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 2.5675 | 100%, 1% | 1 | 7 | 2.695875 | p < 1e-04 |
| unique_start_positions_coverage | 50% | 20% | 50 | 50 | 607.0 | 1.00e-07 | 0.0000012 | **** | 2.9350 | 50%, 20% | 2 | 3 | 3.081750 | p < 1e-04 |
| unique_start_positions_coverage | 50% | 10% | 50 | 50 | 258.0 | 0.00e+00 | 0.0000000 | **** | 3.3025 | 50%, 10% | 2 | 4 | 3.467625 | p < 1e-04 |
| unique_start_positions_coverage | 50% | 5% | 50 | 50 | 365.5 | 0.00e+00 | 0.0000000 | **** | 3.6700 | 50%, 5% | 2 | 5 | 3.853500 | p < 1e-04 |
| unique_start_positions_coverage | 50% | 2% | 50 | 50 | 306.5 | 0.00e+00 | 0.0000000 | **** | 4.0375 | 50%, 2% | 2 | 6 | 4.239375 | p < 1e-04 |
| unique_start_positions_coverage | 50% | 1% | 50 | 50 | 175.5 | 0.00e+00 | 0.0000000 | **** | 4.4050 | 50%, 1% | 2 | 7 | 4.625250 | p < 1e-04 |
| unique_start_positions_coverage | 20% | 10% | 50 | 50 | 800.5 | 1.00e-03 | 0.0210000 |
|
4.7725 | 20%, 10% | 3 | 4 | 5.011125 | p = 0.021 |
| unique_start_positions_coverage | 20% | 5% | 50 | 50 | 944.0 | 2.90e-02 | 0.6090000 | ns | 5.1400 | 20%, 5% | 3 | 5 | 5.397000 | p = 0.609 |
| unique_start_positions_coverage | 20% | 2% | 50 | 50 | 833.0 | 3.00e-03 | 0.0630000 | ns | 5.5075 | 20%, 2% | 3 | 6 | 5.782875 | p = 0.063 |
| unique_start_positions_coverage | 20% | 1% | 50 | 50 | 678.5 | 5.63e-05 | 0.0011823 | ** | 5.8750 | 20%, 1% | 3 | 7 | 6.168750 | p = 0.0011823 |
| unique_start_positions_coverage | 10% | 5% | 50 | 50 | 1397.0 | 3.04e-01 | 1.0000000 | ns | 6.2425 | 10%, 5% | 4 | 5 | 6.554625 | p = 1 |
| unique_start_positions_coverage | 10% | 2% | 50 | 50 | 1276.5 | 8.56e-01 | 1.0000000 | ns | 6.6100 | 10%, 2% | 4 | 6 | 6.940500 | p = 1 |
| unique_start_positions_coverage | 10% | 1% | 50 | 50 | 1118.5 | 3.60e-01 | 1.0000000 | ns | 6.9775 | 10%, 1% | 4 | 7 | 7.326375 | p = 1 |
| unique_start_positions_coverage | 5% | 2% | 50 | 50 | 1128.0 | 3.94e-01 | 1.0000000 | ns | 7.3450 | 5%, 2% | 5 | 6 | 7.712250 | p = 1 |
| unique_start_positions_coverage | 5% | 1% | 50 | 50 | 973.0 | 5.30e-02 | 1.0000000 | ns | 7.7125 | 5%, 1% | 5 | 7 | 8.098125 | p = 1 |
| unique_start_positions_coverage | 2% | 1% | 50 | 50 | 1096.0 | 2.84e-01 | 1.0000000 | ns | 8.0800 | 2%, 1% | 6 | 7 | 8.484000 | p = 1 |
8.6 Manuscript figures
legend <- cowplot::get_legend(
elite_ave_performance_fig +
guides(
color=guide_legend(nrow=1),
fill=guide_legend(nrow=1)
) +
theme(
legend.position = "bottom",
legend.box="horizontal",
legend.justification="center"
)
)
grid <- plot_grid(
elite_ave_performance_fig +
ggtitle("Performance over time") +
theme(legend.position="none"),
elite_final_performance_fig +
ggtitle("Final performance") +
theme(),
unique_start_position_coverage_fig +
ggtitle("Activation position coverage over time") +
theme(legend.position="none"),
unique_start_positions_coverage_final_fig +
ggtitle("Final activation position coverage") +
theme(),
nrow=2,
ncol=2,
rel_widths=c(3,2),
labels="auto"
)
grid <- plot_grid(
grid,
legend,
nrow=2,
ncol=1,
rel_heights=c(1, 0.1)
)
save_plot(
paste(working_directory, "imgs/down-sampled-panel.pdf", sep=""),
grid,
base_width=12,
base_height=8
)
grid