Chapter 6 Truncation selection

We present the results from our parameter sweeep on truncation selection. 50 replicates are conducted for each truncation size T parameter value explored.

library(ggplot2)
library(cowplot)
library(dplyr)
library(PupillometryR)

6.1 Exploitation rate results

Here we present the results for best performances found by each truncation selection value replicate on the exploitation rate diagnostic.

6.1.1 Performance over time

Performance over time.

lines = filter(tru_ot, diagnostic == 'exploitation_rate') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(pop_fit_max),
            mean = mean(pop_fit_max),
            max = max(pop_fit_max)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean / DIMENSIONALITY, group = T, fill = T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min / DIMENSIONALITY, ymax = max / DIMENSIONALITY), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Best performance over time") +
  p_theme

6.1.2 Generation satisfactory solution found

The first Generations a satisfactory solution is found throughout the 50,000 generations.

filter(tru_ssf, Diagnostic == 'EXPLOITATION_RATE') %>%
ggplot(., aes(x = T, y = Generations, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Generation",
    limits=c(0, 12000),
    breaks=c(0, 2000, 4000, 6000, 8000, 10000, 12000),
    labels=c("0e+4", "2e+4", "4e+4", "6e+4", "8e+4", "10e+4", "12e+4")
  ) +
  scale_x_discrete(
    name="T"
  ) +
  ggtitle("Generation satisfactory solution found") +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  p_theme

6.1.2.1 Stats

Summary statistics for the best performance found throughout 50,000 generations.

ssf = filter(tru_ssf, Diagnostic == 'EXPLOITATION_RATE')
group_by(ssf, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(Generations)),
    min = min(Generations, na.rm = TRUE),
    median = median(Generations, na.rm = TRUE),
    mean = mean(Generations, na.rm = TRUE),
    max = max(Generations, na.rm = TRUE),
    IQR = IQR(Generations, na.rm = TRUE)
  )

## # A tibble: 8 x 8
##   T     count na_cnt   min median   mean   max   IQR
##   <fct> <int>  <int> <int>  <dbl>  <dbl> <int> <dbl>
## 1 2        50      0  2887  2912   2912.  2955  18.2
## 2 4        50      0  3091  3125   3126.  3171  19  
## 3 8        50      0  3357  3420   3421.  3481  34.2
## 4 16       50      0  3781  3834.  3833.  3873  20.8
## 5 32       50      0  4344  4396.  4396.  4450  41.2
## 6 64       50      0  5211  5256.  5259.  5322  38  
## 7 128      50      0  6675  6773   6772.  6861  62  
## 8 256      50      0 10250 10368. 10369. 10492  73.2

Kruskal–Wallis test provides evidence of significant differences among the Generations a satisfactory solution is first found.

kruskal.test(Generations ~ T,data = ssf)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Generations by T
## Kruskal-Wallis chi-squared = 392.77, df = 7, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on the Generations a satisfactory solution is first found. .

pairwise.wilcox.test(x = ssf$Generations, g = ssf$T , p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  ssf$Generations and ssf$T 
## 
##     2      4      8      16     32     64     128   
## 4   <2e-16 -      -      -      -      -      -     
## 8   <2e-16 <2e-16 -      -      -      -      -     
## 16  <2e-16 <2e-16 <2e-16 -      -      -      -     
## 32  <2e-16 <2e-16 <2e-16 <2e-16 -      -      -     
## 64  <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 -      -     
## 128 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 -     
## 256 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
## 
## P value adjustment method: bonferroni

6.1.3 Multi-valley crossing

6.1.3.1 Performance over time

# data for lines and shading on plots
lines = filter(tru_ot_mvc, diagnostic == 'exploitation_rate') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Performance over time')+
  p_theme +
  guides(
    shape=guide_legend(nrow=2, title.position = "left"),
    color=guide_legend(nrow=2, title.position = "left"),
    fill=guide_legend(nrow=2, title.position = "left")
  )

6.1.3.2 Performance comparison

Best performances in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'exploitation_rate' & gen == 50000 & T != 'ran')
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'exploitation_rate' & gen == 40000 & T != 'ran')
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=pop_fit_max / DIMENSIONALITY, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Performance comparisons") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.1.3.3 Stats

Summary statistics for the performance of the best performance at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'exploitation_rate' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max  / DIMENSIONALITY)),
    min = min(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max    IQR
##    <fct> <fct>      <int>  <int> <dbl>  <dbl> <dbl> <dbl>  <dbl>
##  1 1     50000         50      0  17.8   18.1  18.1  18.4 0.135 
##  2 1     40000         50      0  17.6   17.9  17.9  18.1 0.140 
##  3 2     50000         50      0  17.9   18.0  18.0  18.3 0.127 
##  4 2     40000         50      0  17.6   17.9  17.9  18.1 0.0875
##  5 4     50000         50      0  17.8   18.1  18.1  18.3 0.122 
##  6 4     40000         50      0  17.7   17.9  17.9  18.2 0.138 
##  7 8     50000         50      0  17.8   18.0  18.0  18.2 0.118 
##  8 8     40000         50      0  17.7   17.9  17.9  18.1 0.147 
##  9 16    50000         50      0  17.9   18.1  18.1  18.4 0.148 
## 10 16    40000         50      0  17.7   18.0  18.0  18.3 0.137 
## 11 32    50000         50      0  17.9   18.1  18.1  18.3 0.106 
## 12 32    40000         50      0  17.7   17.9  17.9  18.1 0.0900
## 13 64    50000         50      0  17.8   18.1  18.1  18.3 0.147 
## 14 64    40000         50      0  17.7   17.9  17.9  18.2 0.192 
## 15 128   50000         50      0  17.8   18.1  18.1  18.2 0.140 
## 16 128   40000         50      0  17.7   17.9  17.9  18.2 0.145 
## 17 256   50000         50      0  18.0   18.1  18.2  18.5 0.162 
## 18 256   40000         50      0  17.8   18.1  18.1  18.5 0.173

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 2 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$pop_fit_max and filter(slices, T == 2 & Generation == 40000)$pop_fit_max
## W = 2109.5, p-value = 3.13e-09
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 4 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$pop_fit_max and filter(slices, T == 4 & Generation == 40000)$pop_fit_max
## W = 2003.5, p-value = 2.067e-07
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 8 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$pop_fit_max and filter(slices, T == 8 & Generation == 40000)$pop_fit_max
## W = 2037.5, p-value = 5.705e-08
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 16 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$pop_fit_max and filter(slices, T == 16 & Generation == 40000)$pop_fit_max
## W = 1998.5, p-value = 2.457e-07
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 32 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$pop_fit_max and filter(slices, T == 32 & Generation == 40000)$pop_fit_max
## W = 2151, p-value = 5.311e-10
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 64 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$pop_fit_max and filter(slices, T == 64 & Generation == 40000)$pop_fit_max
## W = 1997, p-value = 2.628e-07
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 128 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$pop_fit_max and filter(slices, T == 128 & Generation == 40000)$pop_fit_max
## W = 2022.5, p-value = 1.009e-07
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 256 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$pop_fit_max and filter(slices, T == 256 & Generation == 40000)$pop_fit_max
## W = 1742, p-value = 0.0007032
## alternative hypothesis: true location shift is not equal to 0

6.2 Ordered exploitation results

Here we present the results for best performances found by each truncation selection size value replicate on the ordered exploitation diagnostic. Best performance found refers to the largest average trait score found in a given population. Note that performance values fall between 0 and 100.

6.2.1 Performance over time

Performance over time.

lines = filter(tru_ot, diagnostic == 'ordered_exploitation') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(pop_fit_max),
            mean = mean(pop_fit_max),
            max = max(pop_fit_max)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean / DIMENSIONALITY, group = T, fill = T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min / DIMENSIONALITY, ymax = max / DIMENSIONALITY), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Best performance over time") +
  p_theme

6.2.2 Generation satisfactory solution found

The first Generations a satisfactory solution is found throughout the 50,000 generations.

filter(tru_ssf, Diagnostic == 'ORDERED_EXPLOITATION') %>%
ggplot(., aes(x = T, y = Generations, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_shape_manual(values=SHAPE)+
  scale_y_continuous(
    name="Generation",
    limits=c(0, 60000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000, 60000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4", "Fail")
  ) +
  scale_x_discrete(
    name="T"
  ) +
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Generation satisfactory solution found") +
  p_theme

## Warning: Removed 26 rows containing missing values (`geom_point()`).

6.2.2.1 Stats

Summary statistics for the first Generations a satisfactory solution is found throughout the 50,000 generations.

ssf = filter(tru_ssf, Diagnostic == 'ORDERED_EXPLOITATION')
group_by(ssf, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(Generations)),
    min = min(Generations, na.rm = TRUE),
    median = median(Generations, na.rm = TRUE),
    mean = mean(Generations, na.rm = TRUE),
    max = max(Generations, na.rm = TRUE),
    IQR = IQR(Generations, na.rm = TRUE)
  )

## # A tibble: 8 x 8
##   T     count na_cnt   min median   mean   max   IQR
##   <fct> <int>  <int> <int>  <dbl>  <dbl> <int> <dbl>
## 1 2        50      0 11664 12356. 12407. 12992  496.
## 2 4        50      0 13096 13871  13840. 14459  496.
## 3 8        50      0 14701 15466. 15511. 16280  422.
## 4 16       50      0 16002 17192  17098. 18174  813.
## 5 32       50      0 19392 20223  20274. 21055  555.
## 6 64       50      0 24348 25568. 25598. 27268  764.
## 7 128      50      0 36490 38016  37967. 39959 1210.
## 8 256      50      0 60000 60000  60000  60000    0

Kruskal–Wallis test provides evidence of significant differences among the first Generations a satisfactory solution is found throughout the 50,000 generations.

kruskal.test(Generations ~ T, data = ssf)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Generations by T
## Kruskal-Wallis chi-squared = 393.49, df = 7, p-value < 2.2e-16

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on the first Generations a satisfactory solution is found throughout the 50,000 generations.

pairwise.wilcox.test(x = ssf$Generations, g = ssf$T , p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 'g')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  ssf$Generations and ssf$T 
## 
##     2      4      8      16     32     64     128   
## 4   <2e-16 -      -      -      -      -      -     
## 8   <2e-16 <2e-16 -      -      -      -      -     
## 16  <2e-16 <2e-16 <2e-16 -      -      -      -     
## 32  <2e-16 <2e-16 <2e-16 <2e-16 -      -      -     
## 64  <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 -      -     
## 128 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 -     
## 256 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
## 
## P value adjustment method: bonferroni

6.2.3 Multi-valley crossing

6.2.3.1 Performance over time

# data for lines and shading on plots
lines = filter(tru_ot_mvc, diagnostic == 'ordered_exploitation') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Performance over time')+
  p_theme +
  guides(
    shape=guide_legend(nrow=2, title.position = "left"),
    color=guide_legend(nrow=2, title.position = "left"),
    fill=guide_legend(nrow=2, title.position = "left")
  )

6.2.3.2 Performance comparison

Best performances in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'ordered_exploitation' & gen == 50000 & T != 'ran')
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'ordered_exploitation' & gen == 40000 & T != 'ran')
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=pop_fit_max / DIMENSIONALITY, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Performance comparisons") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.2.3.3 Stats

Summary statistics for the performance of the best performance at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'ordered_exploitation' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max  / DIMENSIONALITY)),
    min = min(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max    IQR
##    <fct> <fct>      <int>  <int> <dbl>  <dbl> <dbl> <dbl>  <dbl>
##  1 1     50000         50      0  7.43   8.37  8.34  8.50 0.108 
##  2 1     40000         50      0  7.42   8.37  8.33  8.50 0.113 
##  3 2     50000         50      0  5.58   8.36  8.23  8.54 0.108 
##  4 2     40000         50      0  5.57   8.36  8.21  8.53 0.104 
##  5 4     50000         50      0  5.94   8.35  8.19  8.52 0.102 
##  6 4     40000         50      0  5.94   8.33  8.18  8.51 0.107 
##  7 8     50000         50      0  6.01   8.35  8.19  8.65 0.0922
##  8 8     40000         50      0  6.01   8.33  8.17  8.63 0.112 
##  9 16    50000         50      0  5.45   8.34  8.08  8.59 0.260 
## 10 16    40000         50      0  5.45   8.33  8.05  8.56 0.244 
## 11 32    50000         50      0  5.20   8.33  8.02  8.56 0.553 
## 12 32    40000         50      0  5.20   8.31  8.00  8.55 0.551 
## 13 64    50000         50      0  3.51   7.87  7.61  8.57 1.24  
## 14 64    40000         50      0  3.49   7.86  7.58  8.55 1.23  
## 15 128   50000         50      0  5.34   7.60  7.33  8.55 1.38  
## 16 128   40000         50      0  5.32   7.54  7.29  8.49 1.37  
## 17 256   50000         50      0  4.64   6.25  6.26  7.41 0.973 
## 18 256   40000         50      0  4.58   6.20  6.21  7.29 0.963

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 2 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$pop_fit_max and filter(slices, T == 2 & Generation == 40000)$pop_fit_max
## W = 1359, p-value = 0.4545
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 4 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$pop_fit_max and filter(slices, T == 4 & Generation == 40000)$pop_fit_max
## W = 1355, p-value = 0.4713
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 8 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$pop_fit_max and filter(slices, T == 8 & Generation == 40000)$pop_fit_max
## W = 1375, p-value = 0.3907
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 16 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$pop_fit_max and filter(slices, T == 16 & Generation == 40000)$pop_fit_max
## W = 1367, p-value = 0.4219
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 32 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$pop_fit_max and filter(slices, T == 32 & Generation == 40000)$pop_fit_max
## W = 1320, p-value = 0.6319
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 64 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$pop_fit_max and filter(slices, T == 64 & Generation == 40000)$pop_fit_max
## W = 1319, p-value = 0.6368
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 128 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$pop_fit_max and filter(slices, T == 128 & Generation == 40000)$pop_fit_max
## W = 1311, p-value = 0.6766
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 256 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$pop_fit_max and filter(slices, T == 256 & Generation == 40000)$pop_fit_max
## W = 1321, p-value = 0.627
## alternative hypothesis: true location shift is not equal to 0

6.3 Contraditory objectives diagnostic

Here we present the results for satisfactory trait coverage and activation gene coverage found by each truncation selection size value replicate on the ordered exploitation diagnostic. Satisfactory trait coverage refers to the count of unique satisfied traits in the population, while activation gene coverage refers to the count of unique activation genes in the population. Note that both coverage values fall between 0 and 100.

6.3.1 Satisfactory trait coverage

Satisfactory trait coverage analysis.

6.3.1.1 Coverage over time

Satisfactory trait coverage over time.

lines = filter(tru_ot, diagnostic == 'contradictory_objectives') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(pop_uni_obj),
            mean = mean(pop_uni_obj),
            max = max(pop_uni_obj)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Satisfactory trait coverage over time") +
  p_theme

6.3.1.2 Best coverage throughout

Best satisfactory trait coverage throughout 50,000 generations.

filter(tru_best, col == 'pop_uni_obj' & diagnostic == 'contradictory_objectives') %>%
ggplot(., aes(x = T, y = val, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Best satisfactory trait coverage") +
  p_theme

6.3.1.2.1 Stats

Summary statistics for the best satisfactory trait coverage throughout 50,000 generations.

coverage = filter(tru_best, col == 'pop_uni_obj' & diagnostic == 'contradictory_objectives')
group_by(coverage, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(val)),
    min = min(val, na.rm = TRUE),
    median = median(val, na.rm = TRUE),
    mean = mean(val, na.rm = TRUE),
    max = max(val, na.rm = TRUE),
    IQR = IQR(val, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 1        50      0     1      1     1     1     0
## 2 2        50      0     1      1     1     1     0
## 3 4        50      0     1      1     1     1     0
## 4 8        50      0     1      1     1     1     0
## 5 16       50      0     1      1     1     1     0
## 6 32       50      0     1      1     1     1     0
## 7 64       50      0     1      1     1     1     0
## 8 128      50      0     1      1     1     1     0
## 9 256      50      0     1      1     1     1     0

6.3.1.3 End of 50,000 generations

Satisfactory trait coverage in the population at the end of 50,000 generations.

filter(tru_ot, diagnostic == 'contradictory_objectives' & gen == 50000) %>%
ggplot(., aes(x = T, y = pop_uni_obj, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Final satisfactory trait coverage") +
  p_theme

6.3.1.3.1 Stats

Summary statistics for satisfactory trait coverage in the population at the end of 50,000 generations.

coverage = filter(tru_ot, diagnostic == 'contradictory_objectives' & gen == 50000)
group_by(coverage, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_uni_obj)),
    min = min(pop_uni_obj, na.rm = TRUE),
    median = median(pop_uni_obj, na.rm = TRUE),
    mean = mean(pop_uni_obj, na.rm = TRUE),
    max = max(pop_uni_obj, na.rm = TRUE),
    IQR = IQR(pop_uni_obj, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 1        50      0     1      1     1     1     0
## 2 2        50      0     1      1     1     1     0
## 3 4        50      0     1      1     1     1     0
## 4 8        50      0     1      1     1     1     0
## 5 16       50      0     1      1     1     1     0
## 6 32       50      0     1      1     1     1     0
## 7 64       50      0     1      1     1     1     0
## 8 128      50      0     1      1     1     1     0
## 9 256      50      0     1      1     1     1     0

6.3.2 Activation gene coverage

Here we analyze the activation gene coverage for each parameter replicate on the contradictory objectives diagnostic.

6.3.2.1 Coverage over time

Activation gene coverage over time.

lines = filter(tru_ot, diagnostic == 'contradictory_objectives') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(uni_str_pos),
            mean = mean(uni_str_pos),
            max = max(uni_str_pos)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Activation gene coverage over time") +
  p_theme

6.3.2.2 End of 50,000 generations

Activation gene coverage in the population at the end of 50,000 generations.

filter(tru_ot, diagnostic == 'contradictory_objectives' & gen == 50000) %>%
ggplot(., aes(x = T, y = uni_str_pos, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Final activation gene coverage") +
  p_theme

6.3.2.2.1 Stats

Summary statistics for activation gene coverage in the population at the end of 50,000 generations.

coverage = filter(tru_ot, diagnostic == 'contradictory_objectives' & gen == 50000)
group_by(coverage, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(uni_str_pos)),
    min = min(uni_str_pos, na.rm = TRUE),
    median = median(uni_str_pos, na.rm = TRUE),
    mean = mean(uni_str_pos, na.rm = TRUE),
    max = max(uni_str_pos, na.rm = TRUE),
    IQR = IQR(uni_str_pos, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 1        50      0     1      1     1     1     0
## 2 2        50      0     1      1     1     1     0
## 3 4        50      0     1      1     1     1     0
## 4 8        50      0     1      1     1     1     0
## 5 16       50      0     1      1     1     1     0
## 6 32       50      0     1      1     1     1     0
## 7 64       50      0     1      1     1     1     0
## 8 128      50      0     1      1     1     1     0
## 9 256      50      0     1      1     1     1     0

6.3.3 Multi-valley crossing

6.3.3.1 Satisfactory trait coverage over time

lines = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(pop_uni_obj),
    mean = mean(pop_uni_obj),
    max = max(pop_uni_obj)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Satisfactory trait coverage over time") +
  p_theme

6.3.3.2 Satisfactory trait coverage comparison

Best performances in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & gen == 50000)
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & gen == 40000)
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=pop_uni_obj, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=pop_uni_obj), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=pop_uni_obj), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Coverage",
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Satisfactory trait coverage") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.3.3.2.1 Stats

Summary statistics for the performance of the best performance at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_uni_obj)),
    min = min(pop_uni_obj, na.rm = TRUE),
    median = median(pop_uni_obj, na.rm = TRUE),
    mean = mean(pop_uni_obj, na.rm = TRUE),
    max = max(pop_uni_obj, na.rm = TRUE),
    IQR = IQR(pop_uni_obj, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max   IQR
##    <fct> <fct>      <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
##  1 1     50000         50      0     0      0     0     0     0
##  2 1     40000         50      0     0      0     0     0     0
##  3 2     50000         50      0     0      0     0     0     0
##  4 2     40000         50      0     0      0     0     0     0
##  5 4     50000         50      0     0      0     0     0     0
##  6 4     40000         50      0     0      0     0     0     0
##  7 8     50000         50      0     0      0     0     0     0
##  8 8     40000         50      0     0      0     0     0     0
##  9 16    50000         50      0     0      0     0     0     0
## 10 16    40000         50      0     0      0     0     0     0
## 11 32    50000         50      0     0      0     0     0     0
## 12 32    40000         50      0     0      0     0     0     0
## 13 64    50000         50      0     0      0     0     0     0
## 14 64    40000         50      0     0      0     0     0     0
## 15 128   50000         50      0     0      0     0     0     0
## 16 128   40000         50      0     0      0     0     0     0
## 17 256   50000         50      0     0      0     0     0     0
## 18 256   40000         50      0     0      0     0     0     0

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 2 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$pop_uni_obj and filter(slices, T == 2 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 4 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$pop_uni_obj and filter(slices, T == 4 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 8 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$pop_uni_obj and filter(slices, T == 8 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 16 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$pop_uni_obj and filter(slices, T == 16 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 32 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$pop_uni_obj and filter(slices, T == 32 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 64 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$pop_uni_obj and filter(slices, T == 64 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 128 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$pop_uni_obj and filter(slices, T == 128 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$pop_uni_obj,
            y = filter(slices, T == 256 & Generation == 40000)$pop_uni_obj,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$pop_uni_obj and filter(slices, T == 256 & Generation == 40000)$pop_uni_obj
## W = 1250, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

6.3.3.3 Activation gene coverage over time

lines = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(uni_str_pos),
    mean = mean(uni_str_pos),
    max = max(uni_str_pos)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Activation gene coverage over time") +
  p_theme

6.3.3.4 Activation gene coverage comparison

Activation gene coverage in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & gen == 50000)
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & gen == 40000)
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=uni_str_pos, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=uni_str_pos), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=uni_str_pos), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Coverage",
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Activation gene coverage") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.3.3.4.1 Stats

Summary statistics for the activation gene coverage at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'contradictory_objectives' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(uni_str_pos)),
    min = min(uni_str_pos, na.rm = TRUE),
    median = median(uni_str_pos, na.rm = TRUE),
    mean = mean(uni_str_pos, na.rm = TRUE),
    max = max(uni_str_pos, na.rm = TRUE),
    IQR = IQR(uni_str_pos, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max   IQR
##    <fct> <fct>      <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
##  1 1     50000         50      0     1      5  5.68    11  2.75
##  2 1     40000         50      0     2      5  5.54    10  3   
##  3 2     50000         50      0     2      6  5.78    10  3.5 
##  4 2     40000         50      0     1      6  5.64    10  3   
##  5 4     50000         50      0     2      6  6.08    13  3   
##  6 4     40000         50      0     2      6  6.12    14  2.75
##  7 8     50000         50      0     2      5  5.6     11  3   
##  8 8     40000         50      0     2      6  5.8     11  3   
##  9 16    50000         50      0     1      6  6.04    11  2   
## 10 16    40000         50      0     1      5  5.42    11  3   
## 11 32    50000         50      0     1      6  5.94    11  3   
## 12 32    40000         50      0     2      5  5.58    10  3   
## 13 64    50000         50      0     1      5  5.14    11  3.75
## 14 64    40000         50      0     2      5  5.66    11  3   
## 15 128   50000         50      0     1      5  5.1     10  2.75
## 16 128   40000         50      0     1      5  4.78    10  2.75
## 17 256   50000         50      0     2      5  5.12    11  2   
## 18 256   40000         50      0     2      4  4.58     9  1.75

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 2 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$uni_str_pos and filter(slices, T == 2 & Generation == 40000)$uni_str_pos
## W = 1280.5, p-value = 0.8346
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 4 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$uni_str_pos and filter(slices, T == 4 & Generation == 40000)$uni_str_pos
## W = 1235, p-value = 0.9196
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 8 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$uni_str_pos and filter(slices, T == 8 & Generation == 40000)$uni_str_pos
## W = 1175, p-value = 0.6039
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 16 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$uni_str_pos and filter(slices, T == 16 & Generation == 40000)$uni_str_pos
## W = 1489.5, p-value = 0.09394
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 32 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$uni_str_pos and filter(slices, T == 32 & Generation == 40000)$uni_str_pos
## W = 1363, p-value = 0.4333
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 64 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$uni_str_pos and filter(slices, T == 64 & Generation == 40000)$uni_str_pos
## W = 1091, p-value = 0.2703
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 128 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$uni_str_pos and filter(slices, T == 128 & Generation == 40000)$uni_str_pos
## W = 1335.5, p-value = 0.552
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 256 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$uni_str_pos and filter(slices, T == 256 & Generation == 40000)$uni_str_pos
## W = 1439, p-value = 0.1846
## alternative hypothesis: true location shift is not equal to 0

6.4 Multi-path exploration results

Here we present the results for best performances and activation gene coverage found by each truncation selection size value replicate on the multi-path exploration diagnostic. Best performance found refers to the largest average trait score found in a given population, while activation gene coverage refers to the count of unique activation genes in the population. Note that both values fall between 0 and 100.

6.4.1 Performance

Here we analyze the performances for each parameter replicate on the multi-path exploration diagnostic.

6.4.1.1 Performance over time

Performance over time.

lines = filter(tru_ot, diagnostic == 'multipath_exploration') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(pop_fit_max),
            mean = mean(pop_fit_max),
            max = max(pop_fit_max)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean / DIMENSIONALITY, group = T, fill = T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min / DIMENSIONALITY, ymax = max / DIMENSIONALITY), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Best performance over time") +
  p_theme

6.4.1.2 Best performance throughout

Here we plot the performance of the best performing solution found throughout 50,000 generations.

filter(tru_best, col == 'pop_fit_max' & diagnostic == 'multipath_exploration') %>%
ggplot(., aes(x = T, y = val / DIMENSIONALITY, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Best performance throughout") +
  p_theme

6.4.1.2.1 Stats

Summary statistics for the performance of the best performing solution found throughout 50,000 generations.

performance = filter(tru_best, col == 'pop_fit_max' & diagnostic == 'multipath_exploration')
group_by(performance, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(val)),
    min = min(val / DIMENSIONALITY, na.rm = TRUE),
    median = median(val / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(val / DIMENSIONALITY, na.rm = TRUE),
    max = max(val / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(val / DIMENSIONALITY, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 1        50      0 11      57.0  51.4  91.0  33.7
## 2 2        50      0  6      46.5  49.3 100.   53.2
## 3 4        50      0  7.00   46.0  50.4 100.   48.7
## 4 8        50      0  5      44.0  46.1 100.   49.7
## 5 16       50      0  6      53.5  54.6  99.0  53.2
## 6 32       50      0  5      52.5  50.4  99.0  47.7
## 7 64       50      0  8.00   52.5  51.1  99.9  41.5
## 8 128      50      0  7      50.0  51.8  99.9  49.0
## 9 256      50      0  4      54.5  52.7  96.3  49.1

Kruskal–Wallis test provides evidence of no statistical differences among the best performing solution found throughout 50,000 generations.

kruskal.test(val ~ T,data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  val by T
## Kruskal-Wallis chi-squared = 2.7539, df = 8, p-value = 0.9488

6.4.1.3 End of 50,000 generations

Best performance in the population at the end of 50,000 generations.

filter(tru_ot, diagnostic == 'multipath_exploration' & gen == 50000) %>%
ggplot(., aes(x = T, y = pop_fit_max / DIMENSIONALITY, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Average trait score",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Final performance") +
  p_theme

6.4.1.3.1 Stats

Summary statistics for the best performance in the population at the end of 50,000 generations.

performance = filter(tru_ot, diagnostic == 'multipath_exploration' & gen == 50000)
group_by(performance, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max / DIMENSIONALITY)),
    min = min(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max / DIMENSIONALITY, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 1        50      0 11      57.0  51.4  91.0  33.7
## 2 2        50      0  6      46.5  49.3 100.   53.2
## 3 4        50      0  7.00   46.0  50.4 100.   48.7
## 4 8        50      0  5      44.0  46.1 100.   49.7
## 5 16       50      0  6      53.5  54.6  99.0  53.2
## 6 32       50      0  5      52.5  50.4  99.0  47.7
## 7 64       50      0  8.00   52.5  51.1  99.9  41.5
## 8 128      50      0  7      50.0  51.8  99.9  49.0
## 9 256      50      0  4      54.5  52.7  96.3  49.1

Kruskal–Wallis test provides evidence of no statistical differences among best performance in the population at the end of 50,000 generations.

kruskal.test(pop_fit_max ~ T, data = performance)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_fit_max by T
## Kruskal-Wallis chi-squared = 2.7539, df = 8, p-value = 0.9488

6.4.2 Activation gene coverage

Here we analyze the activation gene coverage for each parameter replicate on the multi-path exploration diagnostic.

6.4.2.1 Coverage over time

Activation gene coverage over time.

lines = filter(tru_ot, diagnostic == 'multipath_exploration') %>%
        group_by(T, gen) %>%
          dplyr::summarise(
            min = min(uni_str_pos),
            mean = mean(uni_str_pos),
            max = max(uni_str_pos)
          )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage",
    limits=c(-1, 101),
    breaks=seq(0,100, 20),
    labels=c("0", "20", "40", "60", "80", "100")
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Activation gene coverage over time") +
  p_theme

6.4.2.2 End of 50,000 generations

Activation gene coverage in the population at the end of 50,000 generations.

filter(tru_ot, diagnostic == 'multipath_exploration' & gen == 50000) %>%
ggplot(., aes(x = T, y = uni_str_pos, color = T, fill = T, shape = T)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), scale = 'width', alpha = 0.2) +
  geom_point(position = position_jitter(width = .1), size = 1.5, alpha = 1.0) +
  geom_boxplot(color = 'black', width = .2, outlier.shape = NA, alpha = 0.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Final activation gene coverage") +
  p_theme

6.4.2.2.1 Stats

Summary statistics for the activation gene coverage in the population at the end of 50,000 generations.

coverage = filter(tru_ot, diagnostic == 'multipath_exploration' & gen == 50000)
group_by(coverage, T) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(uni_str_pos)),
    min = min(uni_str_pos, na.rm = TRUE),
    median = median(uni_str_pos, na.rm = TRUE),
    mean = mean(uni_str_pos, na.rm = TRUE),
    max = max(uni_str_pos, na.rm = TRUE),
    IQR = IQR(uni_str_pos, na.rm = TRUE)
  )

## # A tibble: 9 x 8
##   T     count na_cnt   min median  mean   max   IQR
##   <fct> <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
## 1 1        50      0     1      2  1.92     2     0
## 2 2        50      0     1      2  2        3     0
## 3 4        50      0     1      2  2.04     3     0
## 4 8        50      0     2      2  2.02     3     0
## 5 16       50      0     1      2  2        3     0
## 6 32       50      0     2      2  2.02     3     0
## 7 64       50      0     1      2  2.02     3     0
## 8 128      50      0     1      2  1.98     3     0
## 9 256      50      0     1      2  2.34     7     0

Kruskal–Wallis test provides evidence of statistical differences among activation gene coverage in the population at the end of 50,000 generations.

kruskal.test(uni_str_pos ~ T, data = coverage)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  uni_str_pos by T
## Kruskal-Wallis chi-squared = 20.807, df = 8, p-value = 0.007679

Results for post-hoc Wilcoxon rank-sum test with a Bonferroni correction on the activation gene coverage in the population at the end of 50,000 generations.

pairwise.wilcox.test(x = coverage$uni_str_pos, g = coverage$T , p.adjust.method = "bonferroni",
                     paired = FALSE, conf.int = FALSE, alternative = 't')

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  coverage$uni_str_pos and coverage$T 
## 
##     1     2     4     8     16    32    64    128  
## 2   1.000 -     -     -     -     -     -     -    
## 4   1.000 1.000 -     -     -     -     -     -    
## 8   0.911 1.000 1.000 -     -     -     -     -    
## 16  1.000 1.000 1.000 1.000 -     -     -     -    
## 32  0.911 1.000 1.000 1.000 1.000 -     -     -    
## 64  1.000 1.000 1.000 1.000 1.000 1.000 -     -    
## 128 1.000 1.000 1.000 1.000 1.000 1.000 1.000 -    
## 256 0.047 0.915 1.000 0.887 0.569 0.887 1.000 0.866
## 
## P value adjustment method: bonferroni

6.4.3 Multi-valley crossing

6.4.3.1 Performance over time

# data for lines and shading on plots
lines = filter(tru_ot_mvc, diagnostic == 'multipath_exploration') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(pop_fit_max) / DIMENSIONALITY,
    mean = mean(pop_fit_max) / DIMENSIONALITY,
    max = max(pop_fit_max) / DIMENSIONALITY
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle('Performance over time')+
  p_theme +
  guides(
    shape=guide_legend(nrow=2, title.position = "left"),
    color=guide_legend(nrow=2, title.position = "left"),
    fill=guide_legend(nrow=2, title.position = "left")
  )

6.4.3.2 Performance comparison

Best performances in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & gen == 50000 & T != 'ran')
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & gen == 40000 & T != 'ran')
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=pop_fit_max / DIMENSIONALITY, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=pop_fit_max / DIMENSIONALITY), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Average trait score"
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Performance comparisons") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.4.3.3 Stats

Summary statistics for the performance of the best performance at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(pop_fit_max  / DIMENSIONALITY)),
    min = min(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    median = median(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    mean = mean(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    max = max(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE),
    IQR = IQR(pop_fit_max  / DIMENSIONALITY, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max   IQR
##    <fct> <fct>      <int>  <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 1     50000         50      0 0.720   4.47  4.68  8.75  4.57
##  2 1     40000         50      0 0.720   4.29  4.58  8.75  4.67
##  3 2     50000         50      0 1.28    5.23  5.02  8.78  3.71
##  4 2     40000         50      0 0.740   5.14  4.94  8.77  3.54
##  5 4     50000         50      0 1.41    5.48  5.48  8.87  3.13
##  6 4     40000         50      0 1.41    5.35  5.37  8.87  3.17
##  7 8     50000         50      0 1.52    4.83  4.96  8.43  3.76
##  8 8     40000         50      0 1.52    4.83  4.85  8.42  4.12
##  9 16    50000         50      0 1.17    5.92  5.44  8.60  3.13
## 10 16    40000         50      0 1.17    5.85  5.33  8.42  3.05
## 11 32    50000         50      0 1.45    5.35  4.99  8.33  3.69
## 12 32    40000         50      0 1.45    5.19  4.90  8.32  3.92
## 13 64    50000         50      0 1.03    5.02  4.87  8.67  3.42
## 14 64    40000         50      0 0.940   4.93  4.75  8.66  3.22
## 15 128   50000         50      0 1.18    5.37  5.10  9.14  3.49
## 16 128   40000         50      0 1.18    5.17  4.95  9.12  3.62
## 17 256   50000         50      0 1.27    4.69  4.95  8.54  3.53
## 18 256   40000         50      0 1.27    4.65  4.80  8.24  3.24

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 2 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$pop_fit_max and filter(slices, T == 2 & Generation == 40000)$pop_fit_max
## W = 1300, p-value = 0.7329
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 4 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$pop_fit_max and filter(slices, T == 4 & Generation == 40000)$pop_fit_max
## W = 1307.5, p-value = 0.6944
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 8 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$pop_fit_max and filter(slices, T == 8 & Generation == 40000)$pop_fit_max
## W = 1317, p-value = 0.6466
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 16 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$pop_fit_max and filter(slices, T == 16 & Generation == 40000)$pop_fit_max
## W = 1320, p-value = 0.6319
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 32 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$pop_fit_max and filter(slices, T == 32 & Generation == 40000)$pop_fit_max
## W = 1298.5, p-value = 0.7407
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 64 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$pop_fit_max and filter(slices, T == 64 & Generation == 40000)$pop_fit_max
## W = 1306, p-value = 0.702
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 128 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$pop_fit_max and filter(slices, T == 128 & Generation == 40000)$pop_fit_max
## W = 1315.5, p-value = 0.6541
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$pop_fit_max,
            y = filter(slices, T == 256 & Generation == 40000)$pop_fit_max,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$pop_fit_max and filter(slices, T == 256 & Generation == 40000)$pop_fit_max
## W = 1328.5, p-value = 0.5908
## alternative hypothesis: true location shift is not equal to 0

6.4.3.4 Activation gene coverage over time

lines = filter(tru_ot_mvc, diagnostic == 'multipath_exploration') %>%
  group_by(T, gen) %>%
  dplyr::summarise(
    min = min(uni_str_pos),
    mean = mean(uni_str_pos),
    max = max(uni_str_pos)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

ggplot(lines, aes(x=gen, y=mean, group = T, fill =T, color = T, shape = T)) +
  geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.1) +
  geom_line(size = 0.5) +
  geom_point(data = filter(lines, gen %% 2000 == 0 & gen != 0), size = 1.5, stroke = 2.0, alpha = 1.0) +
  scale_y_continuous(
    name="Coverage"
  ) +
  scale_x_continuous(
    name="Generations",
    limits=c(0, 50000),
    breaks=c(0, 10000, 20000, 30000, 40000, 50000),
    labels=c("0e+4", "1e+4", "2e+4", "3e+4", "4e+4", "5e+4")

  ) +
  scale_shape_manual(values=SHAPE)+
  scale_colour_manual(values = cb_palette) +
  scale_fill_manual(values = cb_palette) +
  ggtitle("Activation gene coverage over time") +
  p_theme

6.4.3.5 Activation gene coverage comparison

Activation gene coverage in the population at 40,000 and 50,000 generations.

# 80% and final generation comparison
end = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & gen == 50000)
end$Generation <- factor(end$gen)

mid = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & gen == 40000)
mid$Generation <- factor(mid$gen)

mvc_p = ggplot(mid, aes(x = T, y=uni_str_pos, group = T, shape = Generation)) +
  geom_point(col = mvc_col[1] , position = position_jitternudge(jitter.width = .03, nudge.x = -0.05), size = 2, alpha = 1.0) +
  geom_boxplot(position = position_nudge(x = -.15, y = 0), lwd = 0.7, col = mvc_col[1], fill = mvc_col[1], width = .1, outlier.shape = NA, alpha = 0.0) +

  geom_point(data = end, aes(x = T, y=uni_str_pos), col = mvc_col[2], position = position_jitternudge(jitter.width = .03, nudge.x = 0.05), size = 2, alpha = 1.0) +
  geom_boxplot(data = end, aes(x = T, y=uni_str_pos), position = position_nudge(x = .15, y = 0), lwd = 0.7, col = mvc_col[2], fill = mvc_col[2], width = .1, outlier.shape = NA, alpha = 0.0) +

  scale_y_continuous(
    name="Coverage",
  ) +
  scale_x_discrete(
    name="T"
  )+
  scale_shape_manual(values=c(0,1))+
  scale_colour_manual(values = c(mvc_col[1],mvc_col[2])) +
  p_theme

plot_grid(
  mvc_p +
    ggtitle("Activation gene coverage") +
    theme(legend.position="none"),
  legend,
  nrow=2,
  rel_heights = c(1,.05),
  label_size = TSIZE
)

6.4.3.5.1 Stats

Summary statistics for the activation gene coverage at 40,000 and 50,000 generations.

slices = filter(tru_ot_mvc, diagnostic == 'multipath_exploration' & (gen == 50000 | gen == 40000))
slices$Generation <- factor(slices$gen, levels = c(50000,40000))
slices %>%
  group_by(T, Generation) %>%
  dplyr::summarise(
    count = n(),
    na_cnt = sum(is.na(uni_str_pos)),
    min = min(uni_str_pos, na.rm = TRUE),
    median = median(uni_str_pos, na.rm = TRUE),
    mean = mean(uni_str_pos, na.rm = TRUE),
    max = max(uni_str_pos, na.rm = TRUE),
    IQR = IQR(uni_str_pos, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'T'. You can override using the `.groups`
## argument.

## # A tibble: 18 x 9
## # Groups:   T [9]
##    T     Generation count na_cnt   min median  mean   max   IQR
##    <fct> <fct>      <int>  <int> <int>  <dbl> <dbl> <int> <dbl>
##  1 1     50000         50      0     1    4.5  4.68    10  3.75
##  2 1     40000         50      0     2    4    4.12     9  2   
##  3 2     50000         50      0     1    5    4.5     13  3   
##  4 2     40000         50      0     2    4    4.24    11  2   
##  5 4     50000         50      0     1    4    4.3     11  2   
##  6 4     40000         50      0     1    3.5  4.06    12  3   
##  7 8     50000         50      0     1    4    4.8     12  3.75
##  8 8     40000         50      0     2    4    4.78    13  4   
##  9 16    50000         50      0     2    4    4.12    11  2.75
## 10 16    40000         50      0     1    3    3.96    11  3   
## 11 32    50000         50      0     1    4.5  4.78    11  3   
## 12 32    40000         50      0     1    3    3.96     9  3.75
## 13 64    50000         50      0     1    4    4.82    15  3   
## 14 64    40000         50      0     1    4    4.12    10  2   
## 15 128   50000         50      0     1    4.5  4.78    11  3   
## 16 128   40000         50      0     1    4    4.52    13  3.75
## 17 256   50000         50      0     1    5    4.82    11  3   
## 18 256   40000         50      0     2    5    4.56    10  2

T 2

wilcox.test(x = filter(slices, T == 2 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 2 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 2 & Generation == 50000)$uni_str_pos and filter(slices, T == 2 & Generation == 40000)$uni_str_pos
## W = 1373, p-value = 0.3915
## alternative hypothesis: true location shift is not equal to 0

T 4

wilcox.test(x = filter(slices, T == 4 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 4 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 4 & Generation == 50000)$uni_str_pos and filter(slices, T == 4 & Generation == 40000)$uni_str_pos
## W = 1354, p-value = 0.4688
## alternative hypothesis: true location shift is not equal to 0

T 8

wilcox.test(x = filter(slices, T == 8 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 8 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 8 & Generation == 50000)$uni_str_pos and filter(slices, T == 8 & Generation == 40000)$uni_str_pos
## W = 1254.5, p-value = 0.9778
## alternative hypothesis: true location shift is not equal to 0

T 16

wilcox.test(x = filter(slices, T == 16 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 16 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 16 & Generation == 50000)$uni_str_pos and filter(slices, T == 16 & Generation == 40000)$uni_str_pos
## W = 1327, p-value = 0.5923
## alternative hypothesis: true location shift is not equal to 0

T 32

wilcox.test(x = filter(slices, T == 32 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 32 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 32 & Generation == 50000)$uni_str_pos and filter(slices, T == 32 & Generation == 40000)$uni_str_pos
## W = 1510.5, p-value = 0.06951
## alternative hypothesis: true location shift is not equal to 0

T 64

wilcox.test(x = filter(slices, T == 64 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 64 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 64 & Generation == 50000)$uni_str_pos and filter(slices, T == 64 & Generation == 40000)$uni_str_pos
## W = 1407, p-value = 0.2749
## alternative hypothesis: true location shift is not equal to 0

T 128

wilcox.test(x = filter(slices, T == 128 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 128 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 128 & Generation == 50000)$uni_str_pos and filter(slices, T == 128 & Generation == 40000)$uni_str_pos
## W = 1348.5, p-value = 0.4941
## alternative hypothesis: true location shift is not equal to 0

T 256

wilcox.test(x = filter(slices, T == 256 & Generation == 50000)$uni_str_pos,
            y = filter(slices, T == 256 & Generation == 40000)$uni_str_pos,
            alternative = 't')

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  filter(slices, T == 256 & Generation == 50000)$uni_str_pos and filter(slices, T == 256 & Generation == 40000)$uni_str_pos
## W = 1308, p-value = 0.6875
## alternative hypothesis: true location shift is not equal to 0