From a26dd0b4bd7430b4dd8137f50646878368b67f78 Mon Sep 17 00:00:00 2001
From: Michael Peters <michael@styx>
Date: Mon, 12 Aug 2024 22:26:43 -0700
Subject: [PATCH] add details on how to produce future generations

---
 src/site/snake/brain-neat.ts | 193 +++++++++++++++++++++--------------
 src/site/snake/ioset.ts      |  99 +++++++++---------
 src/test/test-brain-neat.ts  |  21 +++-
 3 files changed, 185 insertions(+), 128 deletions(-)

diff --git a/src/site/snake/brain-neat.ts b/src/site/snake/brain-neat.ts
index bd0343b..fd32d0f 100644
--- a/src/site/snake/brain-neat.ts
+++ b/src/site/snake/brain-neat.ts
@@ -95,6 +95,7 @@
  * E = # of Excess Genes
  * D = # of Disjoint Genes
  * W = Average Weight Difference of Matching Genes
+ * N = # of Genes in Larger Genome
  * c1, c2, c3 = Provided coefficients to adjust importance of each factor
  *
  * Explicit Fitness Sharing:
@@ -107,38 +108,70 @@
  * δ_t = Adjustable Compatibility Distance Threshold
  * δ(i, j) = Compatibility Distance between i and j
  * sh(δ(i, j)) = Fitness Sharing Function - 1 if δ(i, j) < δ_t else 0
- *               The sum of this function counts the number of organisms in i's species
+ *               The sum of this function is equal to the number of organisms in i's species
  *
+ * --- Iteration ---------------------------------------------------------------
  *
- * TODO: species bucketing -- species from previous generations are considered in the same species forever
- *
+ * 1. Create the initial genome as a "complete bipartite" graph between input and output nodes
+ * 2. Generate an initial population using the initial genome
+ *    - use random values connection weights
+ *    - all nodes should be marked "enabled"
+ *    - all organisms in the initial population will be part of species #1
+ *    	- this implies that randomized connection weights should be chosen such that c3*W < δ_t for all organisms
+ * 3. Training Loop
+ *    a) Compute fitness f_i for all organisms[i]
+ *    b) Find adjusted f_adj_i based on each organism's species
+ *    c) Mating
+ *       - The next generation's organisms are computed as a function of the previous generation's organisms
+ *       - The next generation will have the same population as the previous generation
+ *       - Filtering:
+ *           - Organisms that do not meet the Survival Threshold are not allowed to mate
+ *           - Species that have not improved in fitness for stag_lim generations are not allowed to mate
+ *       - Selection:
+ *           - The "champion" (highest fitness) of each species with at least champ_sp networks is cloned into the next generation
+ *           - while more population is required:
+ *               - for each organism that meets the survival threshold (mom):
+ *                     - select a mate (dad):
+ *                         - r_asex        it is the same organism
+ *                         - else r_int_sp it is from a random other organism from any other species
+ *                         - else          it is from a random other organism from the same species
+ *                     - compute new genome (baby) for mom x dad
+ *                         - dis_odds chance for a node disabled in either parent to become enabled in the child
+ *                     - conditionally mutate baby's genome:
+ *                         - mut_odds chance to mutate each existing connection weight
+ *                             - if mutated, mut_w_p chance to "uniformly perturb" -> x' = x + K * rand(-1,1)
+ *                                           mut_w_r chance to assign random value -> x' = J * rand(-1,1)
+ *                         - r_mut_nn chance to add a new node (src->new) = 1, (new->dst) = weight(src->dst)
+ *                         - r_mut_nc chance to add a new connection of random weight
+ *               - TODO: figure out how future speciation works... does it just keep species based on the champions?
  *
  * --- Parameters --------------------------------------------------------------
  *
  * To learn XOR and DPV (Double-pole balancing, with velocities), Stanley and Miikulainen used the following parameters
- * +-------+-------
- * | paper | desc
- * +-------+-------
- * |   150 | Simulation Population
- * +-------+-------
- * |   3.0 | δ_t - Compatibility Distance Threshold
- * |   1.0 | c1  - CDF Excess genes coefficient
- * |   1.0 | c2  - CDF Disjoint genes coefficient
- * |   0.4 | c3  - CDF Average matching weight coefficient
- * +-------+-------
- * |   80% | Chance for a child to have its connection weights mutated
- * |   90% | Chance for, given a child weights mutation, a weight to be "uniformly perturbed" (x' = x + K * rand())
- * |   10% | Chance for, given a child weights mutation, a weight to be assigned a random value
- * |   25% | Chance for a gene disabled in at least one parent to become enabled in the child
- * +-------+-------
- * |    15 | After this many generations of stagnant fitness growth, a species is barred from reproduction (considered "found its peak")
- * +-------+-------
- * |   25% | Non-crossover offspring (mutation only (and guaranteed))
- * | 0.001 | Interspecies mating rate (chance for a mating event to involve different species)
- * +-------+-------
- * |    3% | Probability of adding a new node
- * |    5% | Probability of adding a new connection
- * +-------+-------
+ * +-------+----------+----
+ * | paper | variable | desc
+ * +-------+----------+----
+ * |   150 | pop      | Simulation Population
+ * +-------+----------+----
+ * |   3.0 | δ_t      | Compatibility Distance Threshold
+ * |   1.0 | c1       | CDF Excess genes coefficient
+ * |   1.0 | c2       | CDF Disjoint genes coefficient
+ * |   0.4 | c3       | CDF Average matching weight coefficient
+ * +-------+----------+----
+ * |   80% | mut_odds | Chance for a child to have its connection weights mutated
+ * |   90% | mut_w_p  | Chance for, given a child weights mutation, a weight to be "uniformly perturbed" (x' = x + K * rand())
+ * |   10% | mut_w_r  | Chance for, given a child weights mutation, a weight to be assigned a random value
+ * |   25% | dis_odds | Chance for a gene disabled in at least one parent to become enabled in the child
+ * +-------+----------+----
+ * |     5 | champ_sp | Species with at least this many networks will have their champions cloned into the next generation
+ * |    15 | stag_lim | After this many generations of stagnant fitness growth, a species is barred from reproduction (considered "found its peak")
+ * +-------+----------+----
+ * |   25% | r_asex   | Non-crossover offspring (mutation only (and guaranteed))
+ * | 0.001 | r_int_sp | Interspecies mating rate (chance for a mating event to involve different species)
+ * +-------+----------+----
+ * |    3% | mut_nn   | Probability of adding a new node
+ * |    5% | mut_nc   | Probability of adding a new connection
+ * +-------+----------+----
  *
  * All nodes used a "slightly steepened" sigmoidal transfer function "for more fine tuning at extreme activations"
  * "It is optimized to be close to linear duing its steepest ascent between activations, -0.5 and 0.5"
@@ -159,6 +192,16 @@
  * |   30% | Probability of adding a new connection
  * +-------+-------
  *
+ * --- Additional Parameters ---------------------------------------------------
+ *
+ * While not described specifically in the paper, these parameters are still important to the simulation
+ *
+ * +-------+-------
+ * | value | desc
+ * +-------+-------
+ * |   20% | Survival Threshold (your adjusted fitness must be in the top x% to survive to the next generation)
+ * +-------+-------
+ *
  * --- TODO --------------------------------------------------------------------
  *
  * - Determine reproduction algo
@@ -173,8 +216,7 @@
  *   - Effectively pass data from inputs, through hidden nodes, to outputs
  */
 
-import IOSet from "./ioset";
-import { Network, Node, NodeID } from "./network";
+import { Network, Node, NodeID } from './network';
 
 interface Gene {
     innovation: number;
@@ -188,15 +230,16 @@ type GeneNode = Gene & { src: Node<GeneNode>; dst: Node<GeneNode> };
 type Genome = Gene[];
 
 interface GenomeAlignment {
-	matching: { a: Gene, b: Gene }[],
-	disjoint: { a: Gene | null, b: Gene | null }[],
-	excess: { a: Gene | null, b: Gene | null }[],
+    matching: { a: Gene; b: Gene }[];
+    disjoint: { a: Gene | null; b: Gene | null }[];
+    excess: { a: Gene | null; b: Gene | null }[];
+    genomes: { a: Genome; b: Genome };
 }
 
 interface CompatibilityDistanceConfig {
-	c1: number;
-	c2: number;
-	c3: number;
+    c1: number;
+    c2: number;
+    c3: number;
 }
 
 type CompatibilityDistanceThreshold = number;
@@ -204,62 +247,60 @@ type CompatibilityDistanceThreshold = number;
 type SpeciesID = number;
 
 export function alignGenome(a: Gene[], b: Gene[]): GenomeAlignment {
-	// genes by innovation number
-	const innovations = new Set<number>();
-	const genesA = new Map<number, Gene>();
-	const genesB = new Map<number, Gene>();
-	for (const gene of a) {
-		innovations.add(gene.innovation);
-		genesA.set(gene.innovation, gene);
-	}
-	for (const gene of b) {
-		innovations.add(gene.innovation);
-		genesB.set(gene.innovation, gene);
-	}
+    // genes by innovation number
+    const innovations = new Set<number>();
+    const genesA = new Map<number, Gene>();
+    const genesB = new Map<number, Gene>();
+    for (const gene of a) {
+        innovations.add(gene.innovation);
+        genesA.set(gene.innovation, gene);
+    }
+    for (const gene of b) {
+        innovations.add(gene.innovation);
+        genesB.set(gene.innovation, gene);
+    }
 
-	const aMaxInnov = a[a.length - 1]!.innovation;
-	const bMaxInnov = b[b.length - 1]!.innovation;
-	const excessStart = Math.min(aMaxInnov, bMaxInnov);
+    const aMaxInnov = a[a.length - 1]!.innovation;
+    const bMaxInnov = b[b.length - 1]!.innovation;
+    const excessStart = Math.min(aMaxInnov, bMaxInnov);
 
-	const alignment: GenomeAlignment = { matching: [], disjoint: [], excess: [] };
+    const alignment: GenomeAlignment = { matching: [], disjoint: [], excess: [], genomes: { a, b } };
 
-	let isDisjoint = false;
-	const innovationsOrder = Array.from(innovations).sort((a, b) => a - b);
-	for (const innovation of innovationsOrder) {
-		const geneA = genesA.get(innovation) ?? null;
-		const geneB = genesB.get(innovation) ?? null;
+    let isDisjoint = false;
+    const innovationsOrder = Array.from(innovations).sort((a, b) => a - b);
+    for (const innovation of innovationsOrder) {
+        const geneA = genesA.get(innovation) ?? null;
+        const geneB = genesB.get(innovation) ?? null;
 
-		if (geneA === null || geneB === null) isDisjoint = true;
+        if (geneA === null || geneB === null) isDisjoint = true;
 
-		const pair = { a: geneA, b: geneB }
-		if (innovation > excessStart) alignment.excess.push(pair);
-		else if (isDisjoint)          alignment.disjoint.push(pair);
-		else                          alignment.matching.push(pair as { a: Gene, b: Gene });
-	}
+        const pair = { a: geneA, b: geneB };
+        if (innovation > excessStart) alignment.excess.push(pair);
+        else if (isDisjoint) alignment.disjoint.push(pair);
+        else alignment.matching.push(pair as { a: Gene; b: Gene });
+    }
 
-	return alignment;
+    return alignment;
 }
 
 export function compatibilityDistance(alignment: GenomeAlignment, { c1, c2, c3 }: CompatibilityDistanceConfig) {
-	const totalLength = alignment.excess.length + alignment.disjoint.length + alignment.matching.length;
-	const avgWeightDiff = alignment.matching
-		.map(({ a, b }) => Math.abs(a.weight - b.weight))
-		.reduce((p, c) => p + c) / alignment.matching.length;
-	const distance = (
-		c1 * alignment.excess.length / totalLength
-		+ c2 * alignment.disjoint.length / totalLength
-		+ c3 * avgWeightDiff
-	);
-	return distance;
+    const maxLength = Math.max(alignment.genomes.a.length, alignment.genomes.b.length);
+    const avgWeightDiff =
+        alignment.matching.map(({ a, b }) => Math.abs(a.weight - b.weight)).reduce((p, c) => p + c) /
+        alignment.matching.length;
+    const distance =
+        (c1 * alignment.excess.length) / maxLength + (c2 * alignment.disjoint.length) / maxLength + c3 * avgWeightDiff;
+    return distance;
 }
 
 // TODO: update this to support "given previous generation's species, what species (or a new species) should this genome be in"
 export function speciate(
-	genomes: Genome[],
-	cdc: CompatibilityDistanceConfig,
-	cdt: CompatibilityDistanceThreshold,
+    species: Map<SpeciesID, Genome[]>,
+    newGenomes: Genome[],
+    cdc: CompatibilityDistanceConfig,
+    cdt: CompatibilityDistanceThreshold,
 ) {
-	const species = new Map<SpeciesID, Genome[]>();
+    // const species = new Map<SpeciesID, Genome[]>();
 }
 
 function activate(n: number) {
diff --git a/src/site/snake/ioset.ts b/src/site/snake/ioset.ts
index ab87e5a..5ef9baf 100644
--- a/src/site/snake/ioset.ts
+++ b/src/site/snake/ioset.ts
@@ -1,62 +1,63 @@
+// TODO: remove this, Set is already insertion-ordered in javascript!
 /** an insertion ordered set */
 export default class IOSet<T> {
-	// NOTE: this data structure could be improved to have O(1)
-	//       deletion time if `list` is swapped out with a linked
-	//       list instead of an array.
-	map: Map<T, number>;
-	list: T[];
+    // NOTE: this data structure could be improved to have O(1)
+    //       deletion time if `list` is swapped out with a linked
+    //       list instead of an array.
+    map: Map<T, number>;
+    list: T[];
 
-	constructor(values?: Iterable<T>) {
-		this.map = new Map();
-		this.list = [];
+    constructor(values?: Iterable<T>) {
+        this.map = new Map();
+        this.list = [];
 
-		if (values !== undefined) this.extend(values)
-	}
+        if (values !== undefined) this.extend(values);
+    }
 
-	has(v: T) {
-		return this.map.has(v);
-	}
+    has(v: T) {
+        return this.map.has(v);
+    }
 
-	add(v: T) {
-		if (!this.map.has(v)) {
-			this.map.set(v, this.list.length);
-			this.list.push(v);
-		}
-	}
+    add(v: T) {
+        if (!this.map.has(v)) {
+            this.map.set(v, this.list.length);
+            this.list.push(v);
+        }
+    }
 
-	delete(v: T) {
-		const idx = this.map.get(v);
-		if (idx === undefined) return false;
-		this.map.delete(v);
-		this.list.splice(idx, 1);
-		return true;
-	}
+    delete(v: T) {
+        const idx = this.map.get(v);
+        if (idx === undefined) return false;
+        this.map.delete(v);
+        this.list.splice(idx, 1);
+        return true;
+    }
 
-	extend(values: Iterable<T>) {
-		for (const v of values) {
-			this.add(v);
-		}
-	}
+    extend(values: Iterable<T>) {
+        for (const v of values) {
+            this.add(v);
+        }
+    }
 
-	shift() {
-		const v = this.list[0]!;
-		// NOTE: performance could be boosted since no need for this.map.get(v) and if
-		this.delete(v);
-		return v;
-	}
+    shift() {
+        const v = this.list[0]!;
+        // NOTE: performance could be boosted since no need for this.map.get(v) and if
+        this.delete(v);
+        return v;
+    }
 
-	pop() {
-		const v = this.list[this.list.length - 1]!;
-		// NOTE: performance could be boosted since no need for this.map.get(v) and if
-		this.delete(v);
-		return v;
-	}
+    pop() {
+        const v = this.list[this.list.length - 1]!;
+        // NOTE: performance could be boosted since no need for this.map.get(v) and if
+        this.delete(v);
+        return v;
+    }
 
-	get size() {
-		return this.list.length;
-	}
+    get size() {
+        return this.list.length;
+    }
 
-	[Symbol.iterator]() {
-		return this.list[Symbol.iterator]();
-	}
+    [Symbol.iterator]() {
+        return this.list[Symbol.iterator]();
+    }
 }
diff --git a/src/test/test-brain-neat.ts b/src/test/test-brain-neat.ts
index cecd5ff..79fd918 100644
--- a/src/test/test-brain-neat.ts
+++ b/src/test/test-brain-neat.ts
@@ -77,17 +77,32 @@ function testCompatibilityDistance() {
 
     const alignment = alignGenome(genomeA, genomeB);
 
+    /*
+     * compatibility distance function (CDF):
+     *
+     *      c1*E + c2*D
+     * δ = ------------- + c3*W
+     *           N
+     *
+     * δ = Compatibility Distance
+     * E = # of Excess Genes
+     * D = # of Disjoint Genes
+     * W = Average Weight Difference of Matching Genes
+     * N = # of Genes in Larger Genome
+     * c1, c2, c3 = Provided coefficients to adjust importance of each factor
+     */
+
     const dist1 = compatibilityDistance(alignment, { c1: 1, c2: 0, c3: 0 });
     const dist2 = compatibilityDistance(alignment, { c1: 0, c2: 1, c3: 0 });
     const dist3 = compatibilityDistance(alignment, { c1: 0, c2: 0, c3: 1 });
-    assert(dist1 === 2 / 10);
-    assert(dist2 === 3 / 10);
+    assert(dist1 === 2 / 9);
+    assert(dist2 === 3 / 9);
     // |8 - 4| + |1 - 0| + |2 - 3| + |9 - 9| + |3 - 5|
     // 4 + 1 + 1 + 0 + 2
     // 8 / 5
     assert(dist3 === 8 / 5);
 
     const distCombo = compatibilityDistance(alignment, { c1: 2, c2: 3, c3: 4 });
-    assert(distCombo === 2 * (2 / 10) + 3 * (3 / 10) + 4 * (8 / 5));
+    assert(distCombo === 2 * (2 / 9) + 3 * (3 / 9) + 4 * (8 / 5));
 }
 addTest(testCompatibilityDistance);