Chapter 17 MI5000: Multi-path exploration results

Here we present the results for the best performances and activation gene coverage generated by each selection scheme replicate on the multi-path exploration diagnostic with configurations presented below. For our the configuration of these experiments, we execute migrations every 50 generations and there are 4 islands in a ring topology. Best performance found refers to the largest average trait score found in a given population. Note that activation gene coverage values are gathered at the population-level. Activation gene coverage refers to the count of unique activation genes in a given population; this gives us a range of integers between 0 and 100.

17.1 Analysis dependencies

library(ggplot2)
library(cowplot)
library(dplyr)
library(PupillometryR)

17.2 Truncation selection

Here we analyze how the different population structures affect truncation selection (size 8) on the contradictory objectives diagnostic.

17.2.1 Performance

17.2.1.1 Performance over time

lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )
ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 2.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Performance over time") +
  p_theme

17.2.1.2 Best performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & VAR == 'pop_fit_max') %>%
  ggplot(., aes(x = Structure, y = VAL / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Best performance')+
  p_theme + coord_flip()

17.2.1.2.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & VAR == 'pop_fit_max')
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(VAL)),
    min = min(VAL, na.rm = TRUE) / DIMENSIONALITY,
    median = median(VAL, na.rm = TRUE) / DIMENSIONALITY,
    mean = mean(VAL, na.rm = TRUE) / DIMENSIONALITY,
    max = max(VAL, na.rm = TRUE) / DIMENSIONALITY,
    IQR = IQR(VAL, na.rm = TRUE) / DIMENSIONALITY
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0   6     52.5  50.6 100.   45.0
## 2 IS          100      0  25.0   82.5  79.7  99.9  20.2
## 3 NMIS        100      0  23.0   82.0  79.7  99.9  20.5

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(VAL ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  VAL by Structure
## Kruskal-Wallis chi-squared = 75.468, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$VAL, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$VAL and performance$Structure 
## 
##      EA      IS
## IS   8.2e-14 - 
## NMIS 8.7e-14 1 
## 
## P value adjustment method: bonferroni

17.2.1.3 Final performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_fit_max / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final performance')+
  p_theme + coord_flip()

17.2.1.3.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & Generations == 50000)
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max)),
    min = min(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max / DIMENSIONALITY, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0   6     52.5  50.6 100.   45.0
## 2 IS          100      0  25.0   82.5  79.7  99.9  20.2
## 3 NMIS        100      0  23.0   82.0  79.7  99.9  20.5

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(pop_fit_max ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_fit_max by Structure
## Kruskal-Wallis chi-squared = 75.468, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$pop_fit_max, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$pop_fit_max and performance$Structure 
## 
##      EA      IS
## IS   8.2e-14 - 
## NMIS 8.7e-14 1 
## 
## P value adjustment method: bonferroni

17.2.2 Generation satisfactory solution found

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_ssf, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION'& Generations <= GENERATIONS) %>%
  ggplot(., aes(x = Structure, y = Generations, color = Structure, fill = Structure, shape = Structure)) +
    geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Generations"
  ) +
  scale_x_discrete(
    name="Structure"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  p_theme + coord_flip()

17.2.2.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

ssf = filter(mi5000_ssf, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & Generations < 60000)
ssf %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(Generations)),
    min = min(Generations, na.rm = TRUE),
    median = median(Generations, na.rm = TRUE),
    mean = mean(Generations, na.rm = TRUE),
    max = max(Generations, na.rm = TRUE),
    IQR = IQR(Generations, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 EA            1      0 15409  15409 15409 15409     0
## 2 IS            2      0 27080  27572 27572 28064   492
## 3 NMIS          2      0 28220  28512 28512 28804   292

Kruskal–Wallis test provides evidence of no difference among selection schemes.

kruskal.test(Generations ~ Structure, data = ssf)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Generations by Structure
## Kruskal-Wallis chi-squared = 3.6, df = 2, p-value = 0.1653

17.2.3 Activation gene coverage

Activation gene coverage analysis.

17.2.3.1 Coverage over time

Activation gene coverage over time.

# data for lines and shading on plots
lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_act_cov),
    mean = mean(pop_act_cov),
    max = max(pop_act_cov)
  )

## `summarise()` has grouped output by 'Structure'. You can override using the
## `.groups` argument.

ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Activation gene coverage over time')+
  p_theme

17.2.3.2 End of 50,000 generations

Activation gene coverage in the population at the end of 50,000 generations.

### end of run
filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_act_cov, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.3) +
  geom_point(position = position_jitter(height = .05, width = .05), size = 1.5, alpha = 0.5) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="Structure"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final activation gene coverage')+
  p_theme + coord_flip()

17.2.3.2.1 Stats

Summary statistics for activation gene coverage.

coverage = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TRUNCATION' & Generations == 50000)
coverage %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_act_cov)),
    min = min(pop_act_cov, na.rm = TRUE),
    median = median(pop_act_cov, na.rm = TRUE),
    mean = mean(pop_act_cov, na.rm = TRUE),
    max = max(pop_act_cov, na.rm = TRUE),
    IQR = IQR(pop_act_cov, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 EA          100      0     1      2  1.97     3     0
## 2 IS          100      0     1      2  2.02     3     0
## 3 NMIS        100      0     3      6  6.33     8     1

Kruskal–Wallis test provides evidence of difference among activation gene coverage.

kruskal.test(pop_act_cov ~ Structure, data = coverage)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_act_cov by Structure
## Kruskal-Wallis chi-squared = 269.84, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on activation gene coverage.

pairwise.wilcox.test(x = coverage$pop_act_cov, g = coverage$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  coverage$pop_act_cov and coverage$Structure 
## 
##      EA     IS    
## IS   0.15   -     
## NMIS <2e-16 <2e-16
## 
## P value adjustment method: bonferroni

17.3 Tournament selection

Here we analyze how the different population structures affect tournament selection (size 8) on the contradictory objectives diagnostic.

17.3.1 Performance

17.3.1.1 Performance over time

lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )
ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 2.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Performance over time") +
  p_theme

17.3.1.2 Best performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & VAR == 'pop_fit_max') %>%
  ggplot(., aes(x = Structure, y = VAL / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Best performance')+
  p_theme + coord_flip()

17.3.1.2.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & VAR == 'pop_fit_max')
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(VAL)),
    min = min(VAL, na.rm = TRUE) / DIMENSIONALITY,
    median = median(VAL, na.rm = TRUE) / DIMENSIONALITY,
    mean = mean(VAL, na.rm = TRUE) / DIMENSIONALITY,
    max = max(VAL, na.rm = TRUE) / DIMENSIONALITY,
    IQR = IQR(VAL, na.rm = TRUE) / DIMENSIONALITY
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0   5     54.5  53.9  99.9  46.0
## 2 IS          100      0  23.0   82.9  79.5  99.8  23.2
## 3 NMIS        100      0  27.0   85.9  81.6  99.8  23.1

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(VAL ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  VAL by Structure
## Kruskal-Wallis chi-squared = 63.856, df = 2, p-value = 1.361e-14

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$VAL, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$VAL and performance$Structure 
## 
##      EA      IS  
## IS   6.2e-11 -   
## NMIS 1.5e-12 0.37
## 
## P value adjustment method: bonferroni

17.3.1.3 Final performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_fit_max / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final performance')+
  p_theme + coord_flip()

17.3.1.3.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & Generations == 50000)
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max)),
    min = min(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max / DIMENSIONALITY, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0   5     54.5  53.9  99.9  46.0
## 2 IS          100      0  23.0   82.9  79.5  99.8  23.2
## 3 NMIS        100      0  27.0   85.9  81.6  99.8  23.1

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(pop_fit_max ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_fit_max by Structure
## Kruskal-Wallis chi-squared = 63.856, df = 2, p-value = 1.361e-14

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$pop_fit_max, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$pop_fit_max and performance$Structure 
## 
##      EA      IS  
## IS   6.2e-11 -   
## NMIS 1.5e-12 0.37
## 
## P value adjustment method: bonferroni

17.3.2 Generation satisfactory solution found

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_ssf, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT'& Generations <= GENERATIONS) %>%
  ggplot(., aes(x = Structure, y = Generations, color = Structure, fill = Structure, shape = Structure)) +
    geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Generations"
  ) +
  scale_x_discrete(
    name="Structure"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  p_theme + coord_flip()

17.3.2.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

ssf = filter(mi5000_ssf, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & Generations < 60000)
ssf %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(Generations)),
    min = min(Generations, na.rm = TRUE),
    median = median(Generations, na.rm = TRUE),
    mean = mean(Generations, na.rm = TRUE),
    max = max(Generations, na.rm = TRUE),
    IQR = IQR(Generations, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median   mean   max   IQR
##   <fct>     <int>  <int> <int>  <dbl>  <dbl> <int> <dbl>
## 1 EA            1      0 27835  27835 27835  27835    0 
## 2 IS            4      0 35249  36223 36015. 36364  445.
## 3 NMIS          9      0 36235  37567 37498. 39029  681

Kruskal–Wallis test provides evidence of no difference among selection schemes.

kruskal.test(Generations ~ Structure, data = ssf)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Generations by Structure
## Kruskal-Wallis chi-squared = 6.6444, df = 2, p-value = 0.03607

pairwise.wilcox.test(x = ssf$Generations, g = ssf$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  ssf$Generations and ssf$Structure 
## 
##      EA   IS  
## IS   0.60 -   
## NMIS 0.30 0.05
## 
## P value adjustment method: bonferroni

17.3.3 Activation gene coverage

Activation gene coverage analysis.

17.3.3.1 Coverage over time

Activation gene coverage over time.

# data for lines and shading on plots
lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_act_cov),
    mean = mean(pop_act_cov),
    max = max(pop_act_cov)
  )

## `summarise()` has grouped output by 'Structure'. You can override using the
## `.groups` argument.

ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Activation gene coverage over time')+
  p_theme

17.3.3.2 End of 50,000 generations

Activation gene coverage in the population at the end of 50,000 generations.

### end of run
filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_act_cov, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.3) +
  geom_point(position = position_jitter(height = .05, width = .05), size = 1.5, alpha = 0.5) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="Structure"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final activation gene coverage')+
  p_theme + coord_flip()

17.3.3.2.1 Stats

Summary statistics for activation gene coverage.

coverage = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'TOURNAMENT' & Generations == 50000)
coverage %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_act_cov)),
    min = min(pop_act_cov, na.rm = TRUE),
    median = median(pop_act_cov, na.rm = TRUE),
    mean = mean(pop_act_cov, na.rm = TRUE),
    max = max(pop_act_cov, na.rm = TRUE),
    IQR = IQR(pop_act_cov, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 EA          100      0     1      2  1.99     3     0
## 2 IS          100      0     1      2  2.01     3     0
## 3 NMIS        100      0     4      6  6.2      8     2

Kruskal–Wallis test provides evidence of difference among activation gene coverage.

kruskal.test(pop_act_cov ~ Structure, data = coverage)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_act_cov by Structure
## Kruskal-Wallis chi-squared = 265.43, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on activation gene coverage.

pairwise.wilcox.test(x = coverage$pop_act_cov, g = coverage$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  coverage$pop_act_cov and coverage$Structure 
## 
##      EA     IS    
## IS   0.85   -     
## NMIS <2e-16 <2e-16
## 
## P value adjustment method: bonferroni

17.4 Lexicase selection

Here we analyze how the different population structures affect standard lexicase selection on the contradictory objectives diagnostic.

17.4.1 Performance

17.4.1.1 Performance over time

lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )
ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 2.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Performance over time") +
  p_theme

17.4.1.2 Best performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & VAR == 'pop_fit_max') %>%
  ggplot(., aes(x = Structure, y = VAL / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Best performance')+
  p_theme + coord_flip()

17.4.1.2.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_best, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & VAR == 'pop_fit_max')
performance$Structure = factor(performance$Structure, levels=c('EA','NMIS','IS'))
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(VAL)),
    min = min(VAL, na.rm = TRUE) / DIMENSIONALITY,
    median = median(VAL, na.rm = TRUE) / DIMENSIONALITY,
    mean = mean(VAL, na.rm = TRUE) / DIMENSIONALITY,
    max = max(VAL, na.rm = TRUE) / DIMENSIONALITY,
    IQR = IQR(VAL, na.rm = TRUE) / DIMENSIONALITY
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0  85.5   93.2  92.8  98.3  4.39
## 2 NMIS        100      0  67.5   76.7  76.2  84.6  4.57
## 3 IS          100      0  66.5   76.3  76.4  85.5  6.01

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(VAL ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  VAL by Structure
## Kruskal-Wallis chi-squared = 199.33, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$VAL, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'l')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$VAL and performance$Structure 
## 
##      EA     NMIS
## NMIS <2e-16 -   
## IS   <2e-16 1   
## 
## P value adjustment method: bonferroni

17.4.1.3 Final performance

First generation a satisfactory solution is found throughout the 50,000 generations.

filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_fit_max / DIMENSIONALITY, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="Structure"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette, ) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final performance')+
  p_theme + coord_flip()

17.4.1.3.1 Stats

Summary statistics for the first generation a satisfactory solution is found.

performance = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & Generations == 50000)
performance$Structure = factor(performance$Structure, levels=c('EA','NMIS','IS'))
performance %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max)),
    min = min(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max / DIMENSIONALITY, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 EA          100      0  78.5   90.3  90.5  98.3  4.98
## 2 NMIS        100      0  63.0   74.1  73.9  83.5  6.57
## 3 IS          100      0  58.6   73.4  73.9  85.5  6.47

Kruskal–Wallis test provides evidence of difference among selection schemes.

kruskal.test(pop_fit_max ~ Structure, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_fit_max by Structure
## Kruskal-Wallis chi-squared = 196.97, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction.

pairwise.wilcox.test(x = performance$pop_fit_max, g = performance$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'l')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  performance$pop_fit_max and performance$Structure 
## 
##      EA     NMIS
## NMIS <2e-16 -   
## IS   <2e-16 1   
## 
## P value adjustment method: bonferroni

17.4.2 Activation gene coverage

Activation gene coverage analysis.

17.4.2.1 Coverage over time

Activation gene coverage over time.

# data for lines and shading on plots
lines = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE') %>%
  group_by(Structure, Generations) %>%
  dplyr::summarise(
    min = min(pop_act_cov),
    mean = mean(pop_act_cov),
    max = max(pop_act_cov)
  )

## `summarise()` has grouped output by 'Structure'. You can override using the
## `.groups` argument.

ggplot(lines, aes(x=Generations, y=mean, group = Structure, fill = Structure, color = Structure, shape = Structure)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, Generations %% 2000 == 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Activation gene coverage over time')+
  p_theme

17.4.2.2 End of 50,000 generations

Activation gene coverage in the population at the end of 50,000 generations.

### end of run
filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & Generations == 50000) %>%
  ggplot(., aes(x = Structure, y = pop_act_cov, color = Structure, fill = Structure, shape = Structure)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.3) +
  geom_point(position = position_jitter(height = .05, width = .05), size = 1.5, alpha = 0.5) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="Structure"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Final activation gene coverage')+
  p_theme + coord_flip()

17.4.2.2.1 Stats

Summary statistics for activation gene coverage.

coverage = filter(mi5000_over_time, Diagnostic == 'MULTIPATH_EXPLORATION' & `Selection\nScheme` == 'LEXICASE' & Generations == 50000)
coverage$Structure = factor(coverage$Structure, levels=c('EA','NMIS','IS'))
coverage %>%
  group_by(Structure) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_act_cov)),
    min = min(pop_act_cov, na.rm = TRUE),
    median = median(pop_act_cov, na.rm = TRUE),
    mean = mean(pop_act_cov, na.rm = TRUE),
    max = max(pop_act_cov, na.rm = TRUE),
    IQR = IQR(pop_act_cov, na.rm = TRUE)
  )

## # A tibble: 3 x 8
##   Structure count na_cnt   min median  mean   max   IQR
##   <fct>     <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 EA          100      0    24     30  31      42  6   
## 2 NMIS        100      0    23     30  30.4    37  3.25
## 3 IS          100      0    19     25  24.8    32  4

Kruskal–Wallis test provides evidence of difference among activation gene coverage.

kruskal.test(pop_act_cov ~ Structure, data = coverage)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_act_cov by Structure
## Kruskal-Wallis chi-squared = 130.57, df = 2, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on activation gene coverage.

pairwise.wilcox.test(x = coverage$pop_act_cov, g = coverage$Structure, p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'l')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  coverage$pop_act_cov and coverage$Structure 
## 
##      EA     NMIS  
## NMIS 0.81   -     
## IS   <2e-16 <2e-16
## 
## P value adjustment method: bonferroni