Chapter 7 Epsilon lexicase
7.1 Overview
7.2 Analysis dependencies
library(ggplot2)
library(tidyverse)
library(knitr)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(rstatix)
library(ggsignif)
library(Hmisc)
library(kableExtra)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")These analyses were conducted in the following computing environment:
## _
## platform x86_64-pc-linux-gnu
## arch x86_64
## os linux-gnu
## system x86_64, linux-gnu
## status
## major 4
## minor 1.0
## year 2021
## month 05
## day 18
## svn rev 80317
## language R
## version.string R version 4.1.0 (2021-05-18)
## nickname Camp Pontanezen
7.3 Setup
data_loc <- paste0(
working_directory,
"data/timeseries-res-1000g.csv"
)
data <- read.csv(data_loc, na.strings="NONE")
data$cardinality <- as.factor(
data$OBJECTIVE_CNT
)
data$selection_name <- as.factor(
data$selection_name
)
data$epsilon <- as.factor(
data$LEX_EPS
)
data$elite_trait_avg <-
data$ele_agg_per / data$OBJECTIVE_CNT
data$unique_start_positions_coverage <-
data$uni_str_pos / data$OBJECTIVE_CNT
final_data <- filter(data, evaluations==max(data$evaluations))
# Labeler for stats annotations
p_label <- function(p_value) {
threshold = 0.0001
if (p_value < threshold) {
return(paste0("p < ", threshold))
} else {
return(paste0("p = ", p_value))
}
}
# Significance threshold
alpha <- 0.05
####### misc #######
# Configure our default graphing theme
theme_set(theme_cowplot())7.4 Exploration diagnostic performance
elite_ave_performance_fig <- ggplot(
data,
aes(x=gen, y=elite_trait_avg, color=epsilon, fill=epsilon)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Average trait performance"
) +
scale_x_continuous(
name="Generations"
) +
scale_fill_brewer(
name="Epsilon",
palette=cb_palette
) +
scale_color_brewer(
name="Epsilon",
palette=cb_palette
)
elite_ave_performance_fig
7.4.1 Final performance
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(elite_trait_avg ~ epsilon) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="epsilon",step.increase=1)
stat.test$label <- mapply(p_label,stat.test$p.adj)elite_final_performance_fig <- ggplot(
final_data,
aes(x=epsilon, y=elite_trait_avg, fill=epsilon)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=epsilon),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Average trait performance",
limits=c(0, 100)
) +
scale_x_discrete(
name="Epsilon"
) +
scale_fill_brewer(
name="Epsilon",
palette=cb_palette
) +
scale_color_brewer(
name="Epsilon",
palette=cb_palette
) +
theme(
legend.position="none"
)
elite_final_performance_fig
stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| elite_trait_avg | 0 | 0.1 | 50 | 50 | 346 | 0.00e+00 | 0.00e+00 | **** | 192.9100 | 0 , 0.1 | 1 | 2 | p < 1e-04 |
| elite_trait_avg | 0 | 0.3 | 50 | 50 | 572 | 3.00e-06 | 8.40e-05 | **** | 290.9318 | 0 , 0.3 | 1 | 3 | p < 1e-04 |
| elite_trait_avg | 0 | 0.6 | 50 | 50 | 1013 | 1.03e-01 | 1.00e+00 | ns | 388.9536 | 0 , 0.6 | 1 | 4 | p = 1 |
| elite_trait_avg | 0 | 1.2 | 50 | 50 | 1958 | 1.10e-06 | 3.02e-05 | **** | 486.9753 | 0 , 1.2 | 1 | 5 | p < 1e-04 |
| elite_trait_avg | 0 | 2.5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 584.9971 | 0 , 2.5 | 1 | 6 | p < 1e-04 |
| elite_trait_avg | 0 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 683.0189 | 0, 5 | 1 | 7 | p < 1e-04 |
| elite_trait_avg | 0 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 781.0407 | 0 , 10 | 1 | 8 | p < 1e-04 |
| elite_trait_avg | 0.1 | 0.3 | 50 | 50 | 1685 | 3.00e-03 | 8.40e-02 | ns | 879.0624 | 0.1, 0.3 | 2 | 3 | p = 0.084 |
| elite_trait_avg | 0.1 | 0.6 | 50 | 50 | 2242 | 0.00e+00 | 0.00e+00 | **** | 977.0842 | 0.1, 0.6 | 2 | 4 | p < 1e-04 |
| elite_trait_avg | 0.1 | 1.2 | 50 | 50 | 2497 | 0.00e+00 | 0.00e+00 | **** | 1075.1060 | 0.1, 1.2 | 2 | 5 | p < 1e-04 |
| elite_trait_avg | 0.1 | 2.5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1173.1278 | 0.1, 2.5 | 2 | 6 | p < 1e-04 |
| elite_trait_avg | 0.1 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1271.1496 | 0.1, 5 | 2 | 7 | p < 1e-04 |
| elite_trait_avg | 0.1 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1369.1713 | 0.1, 10 | 2 | 8 | p < 1e-04 |
| elite_trait_avg | 0.3 | 0.6 | 50 | 50 | 2048 | 0.00e+00 | 1.10e-06 | **** | 1467.1931 | 0.3, 0.6 | 3 | 4 | p < 1e-04 |
| elite_trait_avg | 0.3 | 1.2 | 50 | 50 | 2463 | 0.00e+00 | 0.00e+00 | **** | 1565.2149 | 0.3, 1.2 | 3 | 5 | p < 1e-04 |
| elite_trait_avg | 0.3 | 2.5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1663.2367 | 0.3, 2.5 | 3 | 6 | p < 1e-04 |
| elite_trait_avg | 0.3 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1761.2584 | 0.3, 5 | 3 | 7 | p < 1e-04 |
| elite_trait_avg | 0.3 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 1859.2802 | 0.3, 10 | 3 | 8 | p < 1e-04 |
| elite_trait_avg | 0.6 | 1.2 | 50 | 50 | 2412 | 0.00e+00 | 0.00e+00 | **** | 1957.3020 | 0.6, 1.2 | 4 | 5 | p < 1e-04 |
| elite_trait_avg | 0.6 | 2.5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2055.3238 | 0.6, 2.5 | 4 | 6 | p < 1e-04 |
| elite_trait_avg | 0.6 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2153.3456 | 0.6, 5 | 4 | 7 | p < 1e-04 |
| elite_trait_avg | 0.6 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2251.3673 | 0.6, 10 | 4 | 8 | p < 1e-04 |
| elite_trait_avg | 1.2 | 2.5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2349.3891 | 1.2, 2.5 | 5 | 6 | p < 1e-04 |
| elite_trait_avg | 1.2 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2447.4109 | 1.2, 5 | 5 | 7 | p < 1e-04 |
| elite_trait_avg | 1.2 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2545.4327 | 1.2, 10 | 5 | 8 | p < 1e-04 |
| elite_trait_avg | 2.5 | 5 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2643.4544 | 2.5, 5 | 6 | 7 | p < 1e-04 |
| elite_trait_avg | 2.5 | 10 | 50 | 50 | 2500 | 0.00e+00 | 0.00e+00 | **** | 2741.4762 | 2.5, 10 | 6 | 8 | p < 1e-04 |
| elite_trait_avg | 5 | 10 | 50 | 50 | 2225 | 0.00e+00 | 0.00e+00 | **** | 2839.4980 | 5 , 10 | 7 | 8 | p < 1e-04 |
7.5 Activation position coverage
unique_start_position_coverage_fig <- ggplot(
data,
aes(
x=gen,
y=unique_start_positions_coverage,
color=epsilon,
fill=epsilon
)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Activation position coverage",
limits=c(0.0, 1.0)
) +
scale_x_continuous(
name="Generations"
) +
scale_fill_brewer(
name="Epsilon",
palette=cb_palette
) +
scale_color_brewer(
name="Epsilon",
palette=cb_palette
)
unique_start_position_coverage_fig
7.5.1 Final activation position coverage
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(unique_start_positions_coverage ~ epsilon) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="epsilon",step.increase=1)
stat.test$label <- mapply(p_label,stat.test$p.adj)unique_start_positions_coverage_final_fig <- ggplot(
final_data,
aes(
x=epsilon,
y=unique_start_positions_coverage,
fill=epsilon
)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=epsilon),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Activation position coverage",
limits=c(0, 1.0)
) +
scale_x_discrete(
name="Epsilon"
) +
scale_fill_brewer(
name="Epsilon",
palette=cb_palette
) +
scale_color_brewer(
name="Epsilon",
palette=cb_palette
) +
theme(
legend.position="none"
)
unique_start_positions_coverage_final_fig
stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unique_start_positions_coverage | 0 | 0.1 | 50 | 50 | 2144.0 | 0.00e+00 | 0.0000000 | **** | 0.5500000 | 0 , 0.1 | 1 | 2 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 0.3 | 50 | 50 | 2379.0 | 0.00e+00 | 0.0000000 | **** | 0.7262963 | 0 , 0.3 | 1 | 3 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 0.6 | 50 | 50 | 2449.0 | 0.00e+00 | 0.0000000 | **** | 0.9025926 | 0 , 0.6 | 1 | 4 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 1.2 | 50 | 50 | 2427.5 | 0.00e+00 | 0.0000000 | **** | 1.0788889 | 0 , 1.2 | 1 | 5 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 2.5 | 50 | 50 | 2500.0 | 0.00e+00 | 0.0000000 | **** | 1.2551852 | 0 , 2.5 | 1 | 6 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 5 | 50 | 50 | 2050.5 | 0.00e+00 | 0.0000009 | **** | 1.4314815 | 0, 5 | 1 | 7 | p < 1e-04 |
| unique_start_positions_coverage | 0 | 10 | 50 | 50 | 1207.5 | 7.71e-01 | 1.0000000 | ns | 1.6077778 | 0 , 10 | 1 | 8 | p = 1 |
| unique_start_positions_coverage | 0.1 | 0.3 | 50 | 50 | 1796.0 | 1.58e-04 | 0.0044240 | ** | 1.7840741 | 0.1, 0.3 | 2 | 3 | p = 0.004424 |
| unique_start_positions_coverage | 0.1 | 0.6 | 50 | 50 | 2107.0 | 0.00e+00 | 0.0000001 | **** | 1.9603704 | 0.1, 0.6 | 2 | 4 | p < 1e-04 |
| unique_start_positions_coverage | 0.1 | 1.2 | 50 | 50 | 2018.5 | 1.00e-07 | 0.0000030 | **** | 2.1366667 | 0.1, 1.2 | 2 | 5 | p < 1e-04 |
| unique_start_positions_coverage | 0.1 | 2.5 | 50 | 50 | 2465.0 | 0.00e+00 | 0.0000000 | **** | 2.3129630 | 0.1, 2.5 | 2 | 6 | p < 1e-04 |
| unique_start_positions_coverage | 0.1 | 5 | 50 | 50 | 962.0 | 4.60e-02 | 1.0000000 | ns | 2.4892593 | 0.1, 5 | 2 | 7 | p = 1 |
| unique_start_positions_coverage | 0.1 | 10 | 50 | 50 | 267.0 | 0.00e+00 | 0.0000000 | **** | 2.6655556 | 0.1, 10 | 2 | 8 | p < 1e-04 |
| unique_start_positions_coverage | 0.3 | 0.6 | 50 | 50 | 1657.5 | 5.00e-03 | 0.1400000 | ns | 2.8418519 | 0.3, 0.6 | 3 | 4 | p = 0.14 |
| unique_start_positions_coverage | 0.3 | 1.2 | 50 | 50 | 1557.0 | 3.40e-02 | 0.9520000 | ns | 3.0181481 | 0.3, 1.2 | 3 | 5 | p = 0.952 |
| unique_start_positions_coverage | 0.3 | 2.5 | 50 | 50 | 2328.5 | 0.00e+00 | 0.0000000 | **** | 3.1944444 | 0.3, 2.5 | 3 | 6 | p < 1e-04 |
| unique_start_positions_coverage | 0.3 | 5 | 50 | 50 | 457.0 | 0.00e+00 | 0.0000012 | **** | 3.3707407 | 0.3, 5 | 3 | 7 | p < 1e-04 |
| unique_start_positions_coverage | 0.3 | 10 | 50 | 50 | 66.5 | 0.00e+00 | 0.0000000 | **** | 3.5470370 | 0.3, 10 | 3 | 8 | p < 1e-04 |
| unique_start_positions_coverage | 0.6 | 1.2 | 50 | 50 | 1187.5 | 6.68e-01 | 1.0000000 | ns | 3.7233333 | 0.6, 1.2 | 4 | 5 | p = 1 |
| unique_start_positions_coverage | 0.6 | 2.5 | 50 | 50 | 2184.5 | 0.00e+00 | 0.0000000 | **** | 3.8996296 | 0.6, 2.5 | 4 | 6 | p < 1e-04 |
| unique_start_positions_coverage | 0.6 | 5 | 50 | 50 | 218.0 | 0.00e+00 | 0.0000000 | **** | 4.0759259 | 0.6, 5 | 4 | 7 | p < 1e-04 |
| unique_start_positions_coverage | 0.6 | 10 | 50 | 50 | 19.0 | 0.00e+00 | 0.0000000 | **** | 4.2522222 | 0.6, 10 | 4 | 8 | p < 1e-04 |
| unique_start_positions_coverage | 1.2 | 2.5 | 50 | 50 | 2131.5 | 0.00e+00 | 0.0000000 | **** | 4.4285185 | 1.2, 2.5 | 5 | 6 | p < 1e-04 |
| unique_start_positions_coverage | 1.2 | 5 | 50 | 50 | 289.5 | 0.00e+00 | 0.0000000 | **** | 4.6048148 | 1.2, 5 | 5 | 7 | p < 1e-04 |
| unique_start_positions_coverage | 1.2 | 10 | 50 | 50 | 22.5 | 0.00e+00 | 0.0000000 | **** | 4.7811111 | 1.2, 10 | 5 | 8 | p < 1e-04 |
| unique_start_positions_coverage | 2.5 | 5 | 50 | 50 | 4.5 | 0.00e+00 | 0.0000000 | **** | 4.9574074 | 2.5, 5 | 6 | 7 | p < 1e-04 |
| unique_start_positions_coverage | 2.5 | 10 | 50 | 50 | 0.0 | 0.00e+00 | 0.0000000 | **** | 5.1337037 | 2.5, 10 | 6 | 8 | p < 1e-04 |
| unique_start_positions_coverage | 5 | 10 | 50 | 50 | 386.5 | 0.00e+00 | 0.0000001 | **** | 5.3100000 | 5 , 10 | 7 | 8 | p < 1e-04 |
7.6 Manuscript figures
legend <- cowplot::get_legend(
elite_ave_performance_fig +
guides(
color=guide_legend(nrow=1),
fill=guide_legend(nrow=1)
) +
theme(
legend.position = "bottom",
legend.box="horizontal",
legend.justification="center"
)
)
grid <- plot_grid(
elite_ave_performance_fig +
ggtitle("Performance over time") +
theme(legend.position="none"),
elite_final_performance_fig +
ggtitle("Final performance") +
theme(),
unique_start_position_coverage_fig +
ggtitle("Activation position coverage over time") +
theme(legend.position="none"),
unique_start_positions_coverage_final_fig +
ggtitle("Final activation position coverage") +
theme(),
nrow=2,
ncol=2,
rel_widths=c(3,2),
labels="auto"
)
grid <- plot_grid(
grid,
legend,
nrow=2,
ncol=1,
rel_heights=c(1, 0.1)
)
save_plot(
paste(working_directory, "imgs/epsilon-panel.pdf", sep=""),
grid,
base_width=12,
base_height=8
)
grid