Chapter 4 Tournament selection vs Lexicase selection
4.1 Overview
4.2 Analysis dependencies
library(ggplot2)
library(tidyverse)
library(knitr)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(rstatix)
library(ggsignif)
library(Hmisc)
library(kableExtra)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")These analyses were conducted in the following computing environment:
## _
## platform x86_64-pc-linux-gnu
## arch x86_64
## os linux-gnu
## system x86_64, linux-gnu
## status
## major 4
## minor 1.0
## year 2021
## month 05
## day 18
## svn rev 80317
## language R
## version.string R version 4.1.0 (2021-05-18)
## nickname Camp Pontanezen
4.3 Setup
data_loc <- paste0(
working_directory,
"data/timeseries-res-1000g.csv"
)
data <- read.csv(data_loc, na.strings="NONE")
data$selection_name <- factor(
data$selection_name,
levels=c("EpsilonLexicase", "Tournament"),
labels=c("Lexicase", "Tournament")
)
data$elite_trait_avg <-
data$ele_agg_per / data$OBJECTIVE_CNT
data$unique_start_positions_coverage <-
data$uni_str_pos / data$OBJECTIVE_CNT
final_data <- filter(data, evaluations==max(data$evaluations))
# Labeler for stats annotations
p_label <- function(p_value) {
threshold = 0.0001
if (p_value < threshold) {
return(paste0("p < ", threshold))
} else {
return(paste0("p = ", p_value))
}
}
# Significance threshold
alpha <- 0.05
# Common graph variables
performance_ylim <- 105
coverage_ylim <- 1.0
####### misc #######
# Configure our default graphing theme
theme_set(theme_cowplot())4.4 Exploration diagnostic performance
elite_ave_performance_fig <- ggplot(
data,
aes(
x=gen,
y=elite_trait_avg,
color=selection_name,
fill=selection_name
)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Average trait performance",
limits=c(0, performance_ylim)
) +
scale_x_continuous(
name="Generation"
) +
scale_fill_brewer(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
palette=cb_palette
) +
scale_color_brewer(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
palette=cb_palette
)
elite_ave_performance_fig
4.5 Final Performance
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(elite_trait_avg ~ selection_name) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="selection_name",step.increase=1)
stat.test$manual_position <- stat.test$y.position * 1.05
stat.test$label <- mapply(p_label,stat.test$p.adj)elite_final_performance_fig <- ggplot(
final_data,
aes(
x=selection_name,
y=elite_trait_avg,
fill=selection_name
)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=selection_name),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Average trait performance",
limits=c(0, performance_ylim)
) +
scale_x_discrete(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
scale_fill_brewer(
name="Selection",
palette=cb_palette,
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
scale_color_brewer(
name="Selection",
palette=cb_palette,
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
labs(
subtitle=paste0(
"Kruskal-Wallis, ",
p_label(
signif(
kruskal.test(
formula=elite_trait_avg~selection_name,
data=final_data)$p.value,digits=4
)
)
)
) +
ggsignif::geom_signif(
data=filter(stat.test, p.adj <= alpha),
aes(
xmin=group1,
xmax=group2,
annotations=label,
y_position=manual_position
),
manual=TRUE,
inherit.aes=FALSE
) +
theme(
legend.position="none"
)## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | manual_position | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| elite_trait_avg | Lexicase | Tournament | 50 | 50 | 2357 | 0 | 0 | **** | 97.477 | Lexicase , Tournament | 1 | 2 | 102.3508 | p < 1e-04 |
4.6 Activation position coverage
ggplot(data, aes(x=gen, y=uni_str_pos, color=selection_name)) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Unique activation positions (population)",
limits=c(0, 100)
) +
scale_x_continuous(
name="Generation"
)
Different cardinalities have numbers of possible starting positions, so next, we look at the proportion of starting positions (out of all possible) maintained by populations.
unique_start_position_coverage_fig <- ggplot(
data,
aes(
x=gen,
y=unique_start_positions_coverage,
color=selection_name,
fill=selection_name
)
) +
stat_summary(geom="line", fun=mean) +
stat_summary(
geom="ribbon",
fun.data="mean_cl_boot",
fun.args=list(conf.int=0.95),
alpha=0.2,
linetype=0
) +
scale_y_continuous(
name="Activation position coverage",
limit=c(0, coverage_ylim)
) +
scale_x_continuous(
name="Generation"
) +
scale_fill_brewer(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
palette=cb_palette
) +
scale_color_brewer(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
palette=cb_palette
)
unique_start_position_coverage_fig
4.6.1 Final activation position Coverage
# Compute manual labels for geom_signif
stat.test <- final_data %>%
wilcox_test(unique_start_positions_coverage ~ selection_name) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x="selection_name",step.increase=1)
stat.test$manual_position <- stat.test$y.position * 1.05
stat.test$label <- mapply(p_label,stat.test$p.adj)unique_start_positions_coverage_final_fig <- ggplot(
final_data,
aes(
x=selection_name,
y=unique_start_positions_coverage,
fill=selection_name
)
) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8,
scale="width"
) +
geom_point(
mapping=aes(color=selection_name),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
) +
geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
) +
scale_y_continuous(
name="Activation position coverage",
limits=c(0, coverage_ylim)
) +
scale_x_discrete(
name="Selection",
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
scale_fill_brewer(
name="Selection",
palette=cb_palette,
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
scale_color_brewer(
name="Selection",
palette=cb_palette,
limits=c("Lexicase", "Tournament"),
labels=c("Lexicase", "Tournament"),
) +
labs(
subtitle=paste0(
"Kruskal-Wallis, ",
p_label(
signif(
kruskal.test(
formula=unique_start_positions_coverage~selection_name,
data=final_data)$p.value,digits=4
)
)
)
) +
ggsignif::geom_signif(
data=filter(stat.test, p.adj <= alpha),
aes(
xmin=group1,
xmax=group2,
annotations=label,
y_position=manual_position
),
manual=TRUE,
inherit.aes=FALSE
) +
theme(
legend.position="none"
)## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

stat.test %>%
kbl() %>%
kable_styling(
bootstrap_options = c(
"striped",
"hover",
"condensed",
"responsive"
)
) %>%
scroll_box(width="600px")| .y. | group1 | group2 | n1 | n2 | statistic | p | p.adj | p.adj.signif | y.position | groups | xmin | xmax | manual_position | label |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unique_start_positions_coverage | Lexicase | Tournament | 50 | 50 | 2500 | 0 | 0 | **** | 0.7 | Lexicase , Tournament | 1 | 2 | 0.735 | p < 1e-04 |
4.7 Manuscript figures
legend <- cowplot::get_legend(
elite_ave_performance_fig +
guides(
color=guide_legend(nrow=1),
fill=guide_legend(nrow=1)
) +
theme(
legend.position = "bottom",
legend.box="horizontal",
legend.justification="center"
)
)
grid <- plot_grid(
elite_ave_performance_fig +
ggtitle("Performance over time") +
labs(subtitle="") +
theme(legend.position="none"),
elite_final_performance_fig +
ggtitle("Final performance") +
theme(),
unique_start_position_coverage_fig +
ggtitle("Activation position coverage over time") +
labs(subtitle="") +
theme(legend.position="none"),
unique_start_positions_coverage_final_fig +
ggtitle("Final activation position coverage") +
theme(),
nrow=2,
ncol=2,
rel_widths=c(3,2),
labels="auto"
)
grid <- plot_grid(
grid,
legend,
nrow=2,
ncol=1,
rel_heights=c(1, 0.1)
)
save_plot(
paste(
working_directory,
"imgs/tournament-vs-lexicase-panel.pdf",
sep=""
),
grid,
base_width=12,
base_height=8
)
grid