Commit everything

author: SIPB 2024-12-08 17:12:01 -0500
committer: SIPB 2024-12-08 17:12:01 -0500
commit: 6ae42b74e177f31b6dbcd06a1ae29be34deac8bb (patch)
tree: afac4daf9b51dc992f2152b9f792108a8d6714f8
parent: f54a040ecea0e2273e6bd06874ca4c834b4b8caf (diff)
5 files changed, 1231 insertions, 3663 deletions
diff --git a/blog.bib b/blog.bib
deleted file mode 100644
index b118a5a..0000000
--- a/blog.bib
+++ /dev/null
@@ -1,51 +0,0 @@
-@inproceedings{10.5555/3666122.3666260,
-author = {Zang, Xiao and Yin, Miao and Xiao, Jinqi and Zonouz, Saman and Yuan, Bo},
-title = {GraphMP: graph neural network-based motion planning with efficient graph search},
-year = {2024},
-publisher = {Curran Associates Inc.},
-address = {Red Hook, NY, USA},
-abstract = {Motion planning, which aims to find a high-quality collision-free path in the configuration space, is a fundamental task in robotic systems. Recently, learning-based motion planners, especially the graph neural network-powered, have shown promising planning performance. However, though the state-of-the-art GNN planner can efficiently extract and learn graph information, its inherent mechanism is not well suited for graph search process, hindering its further performance improvement. To address this challenge and fully unleash the potential of GNN in motion planning, this paper proposes GraphMP, a neural motion planner for both low and high-dimensional planning tasks. With the customized model architecture and training mechanism design, GraphMP can simultaneously perform efficient graph pattern extraction and graph search processing, leading to strong planning performance. Experiments on a variety of environments, ranging from 2D Maze to 14D dual KUKA robotic arm, show that our proposed GraphMP achieves significant improvement on path quality and planning speed over state-of-the-art learning-based and classical planners; while preserving competitive success rate.},
-booktitle = {Proceedings of the 37th International Conference on Neural Information Processing Systems},
-articleno = {138},
-numpages = {12},
-location = {New Orleans, LA, USA},
-series = {NIPS '23}
-}
-
-@article{DBLP:journals/corr/abs-2102-09544,
-  author       = {Quentin Cappart and
-                  Didier Ch{\'{e}}telat and
-                  Elias B. Khalil and
-                  Andrea Lodi and
-                  Christopher Morris and
-                  Petar Velickovic},
-  title        = {Combinatorial optimization and reasoning with graph neural networks},
-  journal      = {CoRR},
-  volume       = {abs/2102.09544},
-  year         = {2021},
-  url          = {https://arxiv.org/abs/2102.09544},
-  eprinttype    = {arXiv},
-  eprint       = {2102.09544},
-  timestamp    = {Fri, 26 Feb 2021 14:31:25 +0100},
-  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-09544.bib},
-  bibsource    = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{10.1109/TPAMI.2023.3256421,
-author = {Tutsoy, Onder},
-title = {Graph Theory Based Large-Scale Machine Learning With Multi-Dimensional Constrained Optimization Approaches for Exact Epidemiological Modeling of Pandemic Diseases},
-year = {2023},
-issue_date = {Aug. 2023},
-publisher = {IEEE Computer Society},
-address = {USA},
-volume = {45},
-number = {8},
-issn = {0162-8828},
-url = {https://doi.org/10.1109/TPAMI.2023.3256421},
-doi = {10.1109/TPAMI.2023.3256421},
-abstract = {Multi-dimensional prediction models of the pandemic diseases should be constructed in a way to reflect their peculiar epidemiological characters. In this paper, a graph theory-based constrained multi-dimensional (CM) mathematical and meta-heuristic algorithms (MA) are formed to learn the unknown parameters of a large-scale epidemiological model. The specified parameter signs and the coupling parameters of the sub-models constitute the constraints of the optimization problem. In addition, magnitude constraints on the unknown parameters are imposed to proportionally weight the input-output data importance. To learn these parameters, a gradient-based CM recursive least square (CM-RLS) algorithm, and three search-based MAs; namely, the CM particle swarm optimization (CM-PSO), the CM success history-based adaptive differential evolution (CM-SHADE), and the CM-SHADEWO enriched with the whale optimization (WO) algorithms are constructed. The traditional SHADE algorithm was the winner of the 2018 IEEE congress on evolutionary computation (CEC) and its versions in this paper are modified to create more certain parameter search spaces. The results obtained under the equal conditions show that the mathematical optimization algorithm CM-RLS outperforms the MA algorithms, which is expected since it uses the available gradient information. However, the search-based CM-SHADEWO algorithm is able to capture the dominant character of the CM optimization solution and produce satisfactory estimates in the presence of the hard constraints, uncertainties and lack of gradient information.},
-journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
-month = aug,
-pages = {9836–9845},
-numpages = {10}
-}
-\ No newline at end of file
diff --git a/blog.md b/blog.md
index 10333f9..c86b32b 100644
--- a/blog.md
+++ b/blog.md
@@ -1,11 +1,28 @@
 ---
-build: pandoc blog.md --citeproc --katex -s -o index.html
-mkzip: zip project.zip index.html *.png
-title: "Discovering Graph Algorithms Using Transformers"
-bibliography: blog.bib
-link-citations: true
+build: pandoc blog.md --katex -s --filter pandoc-include -o index.html
+mkzip: zip project.zip index.html img
+title: "Off-Distribution Generalization of Transformers for Shortest Paths"
 ---
 
+<style>
+body {
+    /* Shrink margins */
+    max-width: 45em;
+    /* font-size: 110%; */
+}
+
+/* Make sure toolbar plots aren't cut off */
+.mpld3-figure {
+    height: 480px;
+    display: block;
+    margin: auto;
+}
+</style>
+
+<!--
+BTW we should run a spell checker on this at some point before submitting 
+note to self
+-->
 <!-- Guidelines: https://www.dropbox.com/scl/fi/bet8enscln8ue36kd8t17/final_project_guidelines.pdf?rlkey=knd19cnumk51ho1y9crno56ib&e=2&dl=0 -->
 
 <div style="text-align:center">
@@ -14,31 +31,26 @@ Anthony Wang, Alek Westover, Kevin Zhao
 {xy,alekw,kevinmz}\@mit.edu
 </div>
 
-## Motivation
+## Introduction
 
-Transformers--the architecture that powers LLMs--can do incredible feats: trained on hundreds of gigabytes of raw text, they can learn to hold natural conversations, reason about the physical world, and write code. Skeptics argue that LLMs are simply memorizing their datasets without gaining any deeper understanding. For instance, GPT's o1 model, achieving 90th percentile on Codeforces, struggles with simple but bizarre algorithms problems such as "find the subarray of a 2D array with the minimum average". In this project, we hope to explore **when off-distribution generalization happens in a transformer**. Paul Christiano proposed an experiment [here](https://www.alignmentforum.org/posts/BxersHYN2qcFoonwg/experimentally-evaluating-whether-honesty-generalizes?commentId=dsDA2BWpHPdgLvaXX) about shortest paths in a graph to investigate this, so we decided to become the first to implement his experiment and put transformers' generalization abilities to the test.
+### Motivation
 
-LLMs are notorious for making up complete nonsense, so we also hope that our project can shed light on when truthfulness generalizes. It's generally desirable for LLMs to output true statements. One current approach for ensuring this is to have a human in the loop rewarding the model for true outputs (e.g. RLHF). However, humans can be poor judges of truthfulness and have many cognitive biases and superficial heuristics. A further challenge is that as LLMs become more capable, there might not even exist experts that are good judges of whether the models outputs, such as difficult mathematical proofs, are truthful. For instance, most Task Rabbit workers would probably be hard pressed to evaluate whether a difficult mathematical proof produced by an LLM is true. The entire mathematical community has been known on occasion to [believe false statements for many years](https://en.wikipedia.org/wiki/Grunwald%E2%80%93Wang_theorem).
+Neural networks are capable of impressive feats of off-distribution generalization. For instance, a program trained to convert sketches of cats into realistic looking pictures of cats can draw a cat with three eyes, if the sketch has three eyes, even if such an image never occured in its training data. However, neural networks also often learn non-robust features that cause it to perform very poorly off-distribution (e.g., adversarial examples for an image classifier). In this project, we will investigate the question of when transformers generalize off-distribution through via a case study on a simple synthetic task. More specifically, the goal of our project is to make progress towards answering the following question:
 
-One possible solution is to reward an LLM for truthful behavior on simple inputs, and then hope that the LLM generalizes its truthful behavior for more complex inputs where humans cannot provide helpful labels. Deep learning models can be remarkably good at off-distribution generalization--for instance, a model trained to transform hand drawn cats into images of cats might be able to handle a "cat" with three eyes in an intuitive way. We might hope that generalizing truthfully is simple, thus promoted by "Occam's Razor".
+> Suppose you train a model to be truthful on task A, and to produce coherent answers on a related task B.\
+> Will the model learn to be truthful on task B?
 
-## Related Work
+Here's a more specific version of this question, 
+inspired by a [blog post written by Paul Chrisiano](https://www.alignmentforum.org/posts/BxersHYN2qcFoonwg/experimentally-evaluating-whether-honesty-generalizes?commentId=dsDA2BWpHPdgLvaXX) 
+(who was influential in developing RLHF).
 
-COMMENT FROM ALEK 
--- please remove all mentions of graph neural networks -- that is BS: there is no actual reason why you'd ever use a Neural network to solve shortest paths, the point of choosing a synthetic task is because there is a **simple ground truth** which makes it easy to evaluate whether or not our model is performing correctly. We'd also hoped that the simplicity of the task would make it more feasible to do with a limited compute budget, but apparently this task was too hard for our architecture.
+> Suppose we trained a language model on next token prediction in both English and French, and then fine-tuned the model to be truthful in English. Would the model then learn to answer questions truthfully in French? It has been shown [^1] that this doesn't happen by default, but you could hope that the truthfulness training in English would generalize to French.
 
+In the future, humans are likely to trust AIs with performing important and complex tasks. For some of these tasks it will be infeasible for humans to audit the answers -- either because human labor is too expensive, or because the AIs are too competent, so humans aren't skilled enough to perform an audit (for example, an amateur chess player might have a hard time telling apart a good chess move and a bad chess move). However, there will be some simpler tasks where we can audit the AI's and reward truthul answers. So, it's plausible that models trained to be truthful in some simple domains will learn truthfulness as a general lesson.
 
-There has been some research into the algorithmic optimization of GNNs and how they may solve real-world issues; however, none of the related work targets using generic machine learning methods to solve graph problems.
+### Task
 
-- Cappart et al. has researched more into the Combinatorial Optimization of GNNs and developed algorithms for related tasks, thus facilitating machine learning [@DBLP:journals/corr/abs-2102-09544]. Their results are mostly algorithmic so we develop further by trading a bit of accuracy for much faster computation in such tasks.
-
-- Tutsoy uses a graph-theory-based approach to model the epidemiological characteristics of infectious diseases, such as COVID-19 [@10.1109/TPAMI.2023.3256421]. We understand from his paper how GNN optimization may also be useful in researching novel diseases.
-
-
-
-## Task
-
-Our synthetic task is simple: compute the distance between various vertices in an input graph. To test off-distribution generalization, our experiment has three steps.
+In order to investigate generalization of this type in transformers, we decided to focus on a simple synthetic task -- shortest paths in a graph. More specifically, we ran the following experiment (see [Data](#data) for more details):
 
 1. **Pre-train** a transformer to predict the distance between vertices $1$ and $2$ in graphs with $n \in [3,15]$ vertices.
 
@@ -46,23 +58,130 @@ Our synthetic task is simple: compute the distance between various vertices in a
 ![](img/train.svg)
 </div>
 
-2. **Fine-tune** a transformer to predict the distances between vertex $1$ to $t$ for any $t$ on the shortest path from $1$ to $2$, but only do fine-tuning on graphs with $n \in [3,7]$ vertices.
+2. **Fine-tune** a transformer to predict the distances between vertex $1$ to $t$ for any $t$ on the shortest path from $1$ to $2$, but only do fine-tuning on small graphs with $n \in [3,7]$ vertices. Our intuition was that, in training a model to find distances between vertices $1$ and $2$, the model would also implicitly need to learn about distances from vertex $1$ to other vertices on this shortest path.
 
 <div style="text-align:center">
 ![](img/finetune.svg)
 </div>
 
-3. **Test** whether the transformer can accurately predict the distances between $1$ to $t$ for any $t \leq 7$ on the shortest path from $1$ to $2$ for graphs with $n \in [3,15]$ vertices.
+3. **Test** whether the transformer can accurately generalize to predicting the distance between $1$ to $t$ for any $t \in [3,15]$ on the shortest path from $1$ to $2$ for graphs with $n \in [3,15]$ vertices.
 
 <div style="text-align:center">
 ![](img/test.svg)
 </div>
 
-### Algorithm for Shortest Paths
+<!-- ![Experiment description](traintunegeneralize.png) -->
+
+Obviously, if you actually want to compute shortest paths in a graph, you should use the standard graph algorithm BFS (breadth-first search). Our motivation for using this problem to study generalization was two-fold:
+
+1. It's very simple to generate data and ground truth labels for this task.
+2. There is a simple circuit which computes shortest paths (exactly), by doing something akin to BFS. In fact, we implemented this simple circuit by hand in the section [Handcrafted BFS Transformer](#handcrafted-bfs-transformer). Our hope was that our model can also find a simple circuit that generalized well.
+
+
+<!--
+ANT:  
+RE t<7
+The model couuld've decided to generalize for t > 7 too, but it didn't  
+so id say 
+we tested generalization for all t in 3, 15
+it succeeded for t in 3,7
+failed for t in 8 to 15
+reason is because it could do "if statements" instead of realizing that it should just be looking at the dor product
+mmm
+i feel like the fact that it works for 3,7 and not 8,15 is kind of compelling
+anyways this  is just a comment explaining how im going to / think you should pitch the generalization section
+(it couldve generalized all the way, but didnt, so what we learened is generalization sometimes happens but sometimes doesnt
+
+yeah that's the intuition
+can't really do interpretability to figure out if that's actually what's going on
+-->
+
+### Findings
+
+Our main findings were as follows:
+
+1. There exists a simple set of weights that you can find by hand to achieve arbitrarily small MSE loss on shortest paths. Moreover, this set of weights is "robust" in the sense that, if you perturb some of these weights and run gradient descent, the model will recover low loss.
+
+2. Transformers can learn on their own to perform shortest paths. The training process consists of a sequence of **phase changes** where the loss would barely change for a large number of epochs, and then over a very small number of epochs, the loss would dramatically go down. (TODO: Is this just because we're bad at DL? now it shouldn't have as many phase changes. Or maybe just in the beginning?) In particular, these phase changes corresponded to times when the model decided to, e.g., go from dramatically over-classifying paths as being length 1 (resulting in great performance on tasks of length 1 but poor performance on other tasks) to spreading these length 1 guesses out to other distances. 
+
+3. A model trained to compute the distance between two specific vertices can be fine-tuned efficiently to compute the shortest distance to other vertices.
+
+4. A model trained to compute the distance between vertices $1$ and $2$ on large graphs, and fine tuned to compute the distance between vertices 1 and any vertex (on a shortest $1$-$2$ path) in small graphs, has the following generalization properties:
+   - In larger graphs, the model can still compute the shortest distance between $1$ and any $t$ whose label existed in the smaller graphs, namely $t \in [3,7]$.
+   - However, the model fails when the target vertex doesn't exist in smaller graphs.
+
+The main conceptual implication of this work is that, while off-distribution generalization sometimes does occur, sometimes it also doesn't, and it seems tricky to predict when off-distribution generalization will happen.
+
+### Post Outline
+
+The remainder of our post is structured as follows:
 
-The standard algorithm to find the shortest path in a graph between vertices $u$ and $v$ is **breadth-first search (BFS)**, taught in every intro algorithms class. Initially, BFS starts at $u$, and at each phase, explores a farther layer of vertices from $u$. During a phase, BFS goes through every vertex in the current layer and adds any of their unvisited neighbors to the next layer. The algorithm terminates once we reach $v$ or if the next layer is empty. For a graph with $V$ vertices and $E$ edges, the runtime of BFS is $O(V + E)$. BFS gives us an easy and fast way to find the ground truth answer for any graph, so that we can verify the accuracy of our machine learning approach.
+1. [Handcrafted BFS Transformer](#handcrafted-bfs-transformer): To start, we explicitly construct by hand weights for a transformer to solve the shortest paths problem. We also investigate the "robustness" of our construction by showing that, starting from our constructed solution it's possible to recover low loss after perturbing the weights via gradient descent. This gives a proof-of-concept that a transformer could encode a simple generalizable solution to the problem, and gives some reason to believe that a transformer could learn such a solution.
+
+2. [Training](#training): Next, we discuss the training. First we describe the exact data generation process and model architecture. Then we discuss hyperparameter settings and optimization choices that were important to get the model to train efficiently. Finally, we investigate some interesting phenomena that occured during training, such as phase transitions in loss for different path lengths.
+
+3. [Generalization](#generalization): Finally we discuss the phenomena that we wanted to study -- generalization! We show that some generalization does occur, but other generalization doesn't. We explain why we think this happens.
+
+## Handcrafted BFS Transformer
+
+In this section we describe an explicit set of weights, made (with love) by hand, that result in a transformer that achieves MSE less than $10^{-6}$ for the shortest paths problem. We'll start by describing the (somewhat involved) constructions, and then investigate the geometry of the loss landscape near our explicit solution. The main reason for doing this is to demonstrate the existence of a simple transformer computing shortest paths, to give proof-of-concept that the transformer could learn a generalizable solution. 
+
+The construction is somewhat tricky to write down, so we'll make several (non-essential) simplifying assumptions:
+
+1. We only care about finding the distance between vertex 1 and vertex 2.
+2. We are given a nice embedding of the graph.
+
+It will be obvious from the construction how to get rid of assumptions (1) and (2). 
+
+The embedding of the graph that we will work with here will be different from the embeddings that we use in the rest of the post. In particular, in this section only the tokens will correspond to vertices. In the remainder of the post tokens will correspond to edges. 
+
+More specifically, we will create the following embeddings for our input sequence:
+
+|                 | Answer token | $v_{1}$ token | $\ldots$ | $v_n$ token |
+| --------------- | ------------ | ------------- | -------- | ----------- |
+| Answer flag     | 1            | 0             |          | 0           |
+| Neighbors       |              | 01101         |          | 10010       |
+| Reach           |              | 01101         |          | 10010       |
+| Out             | 00000        |               |          |             |
+| Self            |              | 10000         |          | 00001       |
+| Not-answer flag | 0            | 1             |          | 1           |
+
+As you can see, the hidden dimension of the model will be $4n+2$. The hidden dimension is broken up into 5 sections, some of which are only used in the answer token, and some of which are only used in the vertex tokens. Here are what the different parts of the hidden dimension represent:
+
+1. Flags: booleans indicating whether a vertex is the answer token or not. If desired, you can think of this as part of a "positional embedding". The flags will not be updated as we move through the transformer.
+2. Neighbors: a boolean vector (of dimension $n$) describing the neighbors of each vertex. (We just put zeros here for the answer token). This also won't update through the course of running the transformer.
+3. Reach: a boolean vector representing the vertices that each vertex can reach. This will update over the course of a forward pass of the transformer. Specifically, at layer $i$ of the transformer we will extend the reach of each vertex from vertices which are distance $i+1$ away, to vertices at distance $i+2$ away.
+4. Out: this starts as zero for all tokens. Once vertex $1$ can reach vertex $2$ (i.e., we are in a layer deep enough that the reach vector for $v_{1}$'s token indicates that it can reach vertex $2$) we'll start writing $1$ on the answer token's out part of its embedding.
+5. Self: a 1-hot encoding specifying which vertex each token corresponds to. You should think of this as a positional embedding (along with the flags).
+
+The transformer will consist of $n$ layers, each of which has $n+1$ heads. We've included the code for the handcoded transformer in the [Appendix](#appendix) -- possibly it's helpful to look at the code in conjunction with the written explanation below.
+
+In layer $i$, our goal is to extend the reach of each vertex $v$ from the set of vertices at distance $i+1$ from $v$ to the set of vertices at distance $i+2$ from $v$. 
+The $j$-th head will perform the following operation: 
+
+```
+In head j at layer i:
+Vertex v's query vector will have high dot-product with vertex w's key vector
+iff vertex v can reach vertex j, and vertex w is neighbors with vertex j 
+
+The value that vertex w would give to vertex v is simply a 1-hot vector, with a 1 in the w-th position
+```
 
-We hope that our model can learn BFS or some other simple, generalizable algorithm for shortest paths, because the model can't just pull some magic number out of a hat but intuitively needs to count or list the vertices on the shortest path from $1$ to $2$. In fact, we will show how to hand-craft a set of weights to implement BFS in a transformer, so it's indeed theoretically possible for a transformer to achieve 100% accuracy.
+After running the heads, each vertex's embedding is updated to integrate the new vertices that it can reach into its embedding. Note that we can think of embeddings as being updated because transformers have a residual stream of embeddings that we make modifications to. 
+
+Finally, the last head will be in charge of noticing whether vertex 1 has reached vertex 2 yet, and if so recording this fact. For the full details, see [appendix](#appendix).
+
+### Robustness
+
+The field of Singular Learning Theory (SLT; see Liam Carroll's Master's thesis "Phase Transitions in Neural Networks" for an introduction) aims to understand model training and loss-landscape geometry. In efforts to better understand the loss landscape of the shortest paths loss function according to the tokens used in our hand coded implementation of the shortest paths transformers, we decided to start at a good setting of the parameters, and then perturb the weights, and see if the model can subsequently achieve low loss. The intuition for why this is a good approach at measuring "how attractive of a loss basin" we have is that this experiment is similar to the Local Learning Coefficient from SLT. (see Lau, Edmund, Zach Furman, George Wang, Daniel Murfet, and Susan Wei. "The Local Learning Coefficient: A Singularity-Aware Complexity Measure"). We found that, perturbing the weights led to high loss, but gradient descent was able to recover low loss, indicating that the solution is somewhat "findable" by gradient descent.
+
+
+TODO: Switch to fancy mpld3 plots
+![perturb.png](perturb.png)
+![perturb-loss.png](perturb-loss.png)
+
+
+## Training
 
 ### Data
 
@@ -74,27 +193,117 @@ The full input to our model additionally includes the target vertex $t$ after th
 ![](img/finetune.svg)
 </div>
 
-We have three datasets for each step.
+We have a dataset for each step.
+
+1. **Pre-train data**: For each $n \in [3,15]$, we generated an equal number of graphs on $n$ vertices, with $t = 2$. Each graph was created by choosing $n$ random edges. To balance the dataset, we removed some graphs with shorter paths to obtain the distribution shown below.
+
+TODO: Add axes titles, switch to bar plot
+
+$include`raw="html"` plots/train-dist.html
 
-1. **Pre-train data**: For each $n \in [3,15]$, we generated an equal number of graphs on $n$ vertices, with $t = 2$. Each graph was created by choosing $n$ random edges.
 2. **Fine-tune data**: For each $n \in [3,7]$, we generated an equal number of graphs on $n$ vertices each with a random $t$ on the shortest path from $1$ to $2$. Again, each graph was created by choosing $n$ random edges.
+
 3. **Generalization test data**: The same as the fine-tune data, except we sample $n \in [3,15]$ and $t \leq 7$.
 
 We wrote some Python code to generate the data during the training loop, but Python is excruciatingly slow and data generation wasted a lot of training time. Our solution was to pre-generate the data before training using a multithreaded version of our Python code.
 
-## Complicated explicit transformer formula for shortest paths
+### Architecture
+
+
+We used a standard transformer architecture. To ensure that it can in theory learn BFS, we ensured that the number of layers in our transformer exceeds the diameter of the input graphs.
+
+Since the order of the edges in the input doesn't matter, we did not use positional encodings. Each edge $(a,b)$ is embedded to dimension $d$ where the first $\frac{d}{2}$ elements are the learned embedding of $a$ and the last $\frac{d}{2}$ elements are the learned embedding of $b$. For the target vertex $t$, we pair it with the special token $TARGET$ and embed $(t,TARGET)$ in the same way.
+
+<!-- https://cocreate.csail.mit.edu/r/sxArTEXiAgJshznmm -->
+![](img/embeddings.svg)
+
+### Specifications
+
+For our training run, we used the following specifications:
+
+| Hyperparameter           | Value           |
+| ------------------------ | --------------- |
+| Model dimension          | $64$            |
+| Layers                   | $11$            |
+| Heads                    | $2$             |
+| Epochs                   | $100$           |
+| Batch size               | $2^{15}$        |
+| Dropout                  | $0$             |
+| Training examples        | $10^8$          |
+| Parameters               | $550433$        |
+| Learning rate            | $5\cdot10^{-4}$ |
+| Loss                     | MSE             |
+| Optimizer                | Adam            |
+
+The number of bits required to store the model parameters in float32 is around $1.76\cdot10^6$. The number of possible graphs on 15 vertices generated using our procedure is approximately
+$$\frac{\binom{15}{2}^{15}}{15!} \approx 1.59\cdot10^{18}.$$
+This is because there are $\binom{15}{2}$ choices for each of the 15 edges and we don't care about the order of the edges. This is only an approximation because some edges might be duplicated. Each graph has an answer between 1 and 15 which requires around 4 bits, so memorizing all the answers requires requires $4\cdot1.59\cdot10^{18} = 6.36\cdot10^{18}$ bits, which is $3.61\cdot10^{12}$ times larger than our model size. This implies that in order to get really low loss, our model needs to do something other than brute memorization.
+
+A single training run takes roughly eight hours to run on a Radeon 7900 XTX graphics card.
+
+One pitfall we encountered during training is that we initially used bfloat16 to save VRAM, but our huge batch size caused loss-of-precision problems and made training very difficult. It took us two weeks to debug this until we found that switching to float32 improved training significantly.
+
+### Training Results
+
+Here is our training loss curve:
+
+$include`raw="html"` plots/train-loss.html
+
+Here is a "confusion matrix"-like plot, showing for each true label, the probability distribution over classifications that the model gives.
+
+$include`raw="html"` plots/train-hist.html
+
+One pattern we consistently noticed during training is that the model often gets stuck and plateaus for many epochs before rapidly decreasing. For instance, this happened between epochs 100 and 300 in the graph above:
+
+$include`raw="html"` plots/grokking.html
+
+Such **phase change** phenomena are ubiquitous in deep learning and for simple algorithmic tasks we can sometimes interpret them [^2].
 
-TODO: Kevin or Alek
+**TODO ANT: training curves for 1, 2, 3 length paths. and add some text about why this is an interesting finding.**
+
+$include`raw="html"` plots/len-loss.html
+
+## Generalization
+
+### Fine Tuning Results
+
+After receiving our initial results, we fine-tuned with a learning rate of 1e-5, also with MSE and the same batch size. Our final results are shown in the images below.
+
+$include`raw="html"` plots/tune-loss.html
+
+$include`raw="html"` plots/tune-hist.html
+
+$include`raw="html"` plots/test-onpath-largetarget.html
+
+$include`raw="html"` plots/test-onpath-smalltarget.html
+
+$include`raw="html"` plots/test-small-any.html
+
+$include`raw="html"` plots/test-large-any.html
+
+
+
+## Conclusion
+
+In this post we've investigated solving the shortest paths problem with a transformer. 
+We've shown that this problem has a simple solution that can be implemented by a transformer. 
+We've shown that a transformer can learn to solve this problem. 
+We've shown that when the transformer learns to solve this problem it also implicitly builds good internal representations of the input data that allow for efficient fine tuning to find shortest paths between other vertices. Finally, we've seen that some off-distribution generalization does occur, and some does not! The main conceptual take-away from our work is that it's hard to predict when models will and won't generalize. 
+
+## Appendix
+
+```python
+import torch
+import torch.nn as nn
+import random
+from collections import deque
 
-```py
 # Configuration
-NVTXS = 16
+NVTXS = 8
 MAXDIST = NVTXS + 1
 AVGDEG = 2
 SEQLEN = NVTXS + 1
 HIDDENDIM = 4 * NVTXS + 2
-
-# Start indices for different sections of the input data
 START_REACH = NVTXS + 1
 START_OUT = 2 * NVTXS + 1
 START_SELF = 3 * NVTXS + 1
@@ -102,10 +311,70 @@ SRC_FLAG_IDX = START_SELF
 ANS_FLAG_IDX = 0
 NOTANS_FLAG_IDX = -1
 
-BIG = 20
-SUPABIG = 100
-MED = 10
-CURSE = 5
+# Determine device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def random_graph(device):
+    """Generate a random graph tensor."""
+    data = torch.zeros((SEQLEN, HIDDENDIM), device=device)
+    
+    # Mark self vertices
+    for i in range(1, NVTXS + 1):
+        data[i, START_SELF - 1 + i] = 1
+
+    # Create adjacency list
+    adj_list = [set() for _ in range(SEQLEN)]
+    indices = [random.randint(1, NVTXS) for _ in range(AVGDEG * NVTXS)]
+    
+    for i in range(0, len(indices), 2):
+        u = indices[i]
+        v = indices[i + 1]
+        if u != v:
+            # Bidirectional connections
+            data[v, u] = 1
+            data[u, v] = 1
+            data[v, NVTXS + u] = 1
+            data[u, NVTXS + v] = 1
+            adj_list[u].add(v)
+            adj_list[v].add(u)
+
+    # Set flags
+    data[0, ANS_FLAG_IDX] = 1
+    data[1:, NOTANS_FLAG_IDX] = 1
+    data[0, START_REACH:START_REACH + NVTXS] = 1
+    return data, adj_list
+
+def SSSP(G):
+    """Single Source Shortest Path algorithm."""
+    dist = [MAXDIST for _ in G]
+    dist[1] = 0
+    frontier = deque([1])
+    while frontier:
+        vtx = frontier.popleft()
+        for x in G[vtx]:
+            if dist[x] == MAXDIST:
+                dist[x] = 1 + dist[vtx]
+                frontier.append(x)
+                if x == 2:
+                    return dist[2]
+    return MAXDIST
+
+def mkbatch(size):
+    """Create a batch of graph data."""
+    graphs = []
+    distances = []
+
+    for _ in range(size):
+        data, adj_list = random_graph(device)
+        dist = SSSP(adj_list)
+        graphs.append(data)
+        distances.append(dist)
+
+    data = torch.stack(graphs)
+    labels = torch.tensor(distances, dtype=torch.float32, device=device)
+    return data, labels
+    
+BIG,SUPABIG,MED,CURSE = 12,30,7,5
 
 class SillyTransformer(nn.Module):
     def __init__(self, device):
@@ -121,7 +390,6 @@ class SillyTransformer(nn.Module):
                 Q = nn.Parameter(torch.zeros((2, HIDDENDIM), device=device))
                 Q[0, START_REACH - 1 + head] = SUPABIG
                 Q[1, NOTANS_FLAG_IDX] = 1
-
                 K = nn.Parameter(torch.zeros((2, HIDDENDIM), device=device))
                 K[0, head] = 1
                 K[1, ANS_FLAG_IDX] = BIG
@@ -133,7 +401,6 @@ class SillyTransformer(nn.Module):
                 self.mostKs.append(K)
                 self.mostQs.append(Q)
                 self.mostVs.append(V)
-
             self.weirdKs = nn.ParameterList()
             self.weirdQs = nn.ParameterList()
             self.weirdVs = nn.ParameterList()
@@ -185,101 +452,23 @@ class SillyTransformer(nn.Module):
         canreach = src[:, 0, START_OUT:START_OUT + NVTXS]
         final_output = 1 + torch.sum(1 - canreach, dim=1)
         return final_output
-```
-
-
-
-After much deliberation, we decided the next step for us was to customize a transformer, writing it ourselves. We observed that we wished for the transformer to do similar work as a BFS. As a result, we decided to work with the following transformer, for a graph with $n$ vertices $v_1, v_2, \cdots, v_n$:
-
-\begin{array}{|c|c|c|c|c|c}
-\text{ANS} & v_{1} & v_{2} & \cdots & v_{n} & \\ \hline
-1 & 0 & 0 & \cdots & 0 & \text{ANS}\\ \hline
-\text{ANS} & \text{NBR}_{1} & \text{NBR}_{2} & \cdots & \text{NBR}_{n} & \text{NBR}\\ \hline
-\text{ANS} & \text{REACH}_{1} & \text{REACH}_{2} & \cdots & \text{REACH}_{n} & \text{REACH}\\ \hline
-\text{ANS} & \text{SELF}_{1} & \text{SELF}_{2} & \cdots & \text{SELF}_{n} & \text{SELF}\\ \hline
-V_{\text{OUT}} & NULL& NULL& NULL& NULL& \text{OUT}\\ \hline
-0 & 1 & 1 & \cdots &1 & \text{NOT}\\ \hline
-\end{array}
-
-Specifically, we see that $\text{NBR}_{i}$ is a $n \times 1$ vector detailing which of the vertices are neighboring vertex $v_i$, so the $j$th element of $v_i$ is $1$ if $v_i$ and $v_j$ are neighboring vertices, and $0$ otherwise. Additionally, $\text{SELF}_{i}$ is just the $n \times 1$ vector with the $i$th element $1$ and all other elements $0$ (e.g. the one-hot encoding of the vector). Now, at every step, the $\text{REACH}_k$ vector for all $k$ is updated based on the previous $\text{REACH}_k$ vector and $\text{NBR}_{k}$ (since all entries that are $1$ in $\text{REACH}_k\text{NBR}_{k}^T$ must be updated in the manner such that if the $(i, j)$th element of $\text{REACH}_k\text{NBR}_{k}^T$ is $1$, then $\text{REACH}_i$'s $j$th column is set to $1$. This is equivalent to adding $\text{REACH}_k$ to each integer $i$ where  $\text{NBR}_{k}$'s $i$th entry is nonzero.
-
-This iterates through all the vertices, and at the end, we may see what run we are on to update $V_{\text{OUT}}$.
-
-## Perturbing the Weights
-
-SLT folks like to think about geometry of loss landscape CITE
-So we did an experiment where we mess with the weights. 
-
-Findings: XXX
-
-
-## Our Model
-
-### Architecture
-
-We used a standard transformer architecture. To ensure that it can in theory learn BFS, we ensured that the number of layers in our transformer exceeds the diameter of the input graphs.
-
-Since the order of the edges in the input doesn't matter, we did not use positional encodings. Each edge $(a,b)$ is embedded to dimension $d$ where the first $\frac{d}{2}$ elements are the learned embedding of $a$ and the last $\frac{d}{2}$ elements are the learned embedding of $b$. For the target vertex $t$, we pair it with the special token $TARGET$ and embed $(t,TARGET)$ in the same way.
-
-<!-- https://cocreate.csail.mit.edu/r/sxArTEXiAgJshznmm -->
-![](img/embeddings.svg)
-
-### Training
-
-To match the BFS transformer as closely as possible, we used a model dimension of $64$, $11$ layers, and $2$ heads per layer, for a total of 550433 parameters. In 32-bit float precision, that corresponds to around $1.76\cdot10^6$ bits. The number of possible graphs on 15 vertices generated using our procedure is approximately
-
-$$\frac{\binom{15}{2}^{15}}{15!} = 1.59\cdot10^{18}.$$
-
-This is because there are $\binom{15}{2}$ choices for each of the 15 edges and we don't care about the order of the edges. This is only an approximation because some edges might be duplicated. Each graph has an answer between 1 and 15 which requires around 4 bits, so memorizing all the answers requires $4\cdot1.59\cdot10^{18} = 6.36\cdot10^{18}$ bits, which is $3.61\cdot10^{12}$ times larger than our model size.
-
-To train the model, we used MSE loss, the Adam optimizer, a learning rate of $3\cdot10^{-4}$, and a batch size of $2^{15}$ for one billion randomly generated graphs. A training run takes roughly eight hours to run on a Radeon 7900 XTX graphics card. Our final MSE loss was $0.000555$.
-
-TODO: use https://mpld3.github.io/index.html to make interactive plots
-
-![](training-loss.png)
-
-![](training-2d-histogram.png)
-
-One pattern we consistently noticed during training is that the model often gets stuck and plateaus for many epochs before rapidly decreasing. For instance, this happened between epochs 100 and 300 in the graph above:
-
-![](grokking.png)
-
-"grokking" hypothesis: it's memorizing all length 2 paths?
 
-TODO: cite Neel Nanda grokking modular addition
+model = SillyTransformer(device).to(device)
+params = sum(p.numel() for p in model.parameters())
+print(f"Total number of parameters: {params}")
 
-TODO: CRAZY!!! training curves for 1, 2, 3 length paths
+def destroy_rand_weights(model):
+    weight_lists = [model.mostKs, model.mostQs, model.mostVs, 
+                    model.weirdKs, model.weirdQs, model.weirdVs]
+    random_list = random.choice(weight_lists)
+    random_matrix = random.choice(random_list)
+    random_matrix.data = torch.randn_like(random_matrix)
 
-One pitfall we encountered during training is that we initially used bfloat16 to save VRAM, but our huge batch size caused loss-of-precision problems and made training very difficult. It took us two weeks to debug this until we found that switching to float32 improved training significantly.
-
-## Fine tuning results
-
-After receiving our initial results, we fine-tuned with a learning rate of 1e-5, also with MSE and the same batch size. Our final results are shown in the images below.
-
-![](fine-tuning-loss.png)
-
-![](fine-tuning-2d-histogram.png)
-
-![](test-2d-histogram.png)
-
-TODO: get new graphs
-
-It's pretty good!!!
-
-Can only generalize to target vertices from 2 to 7 since 8 through 15 didn't appear in the fine-tune data
-
-but this still means it
-
-## Conclusion
-
-however, a machine learning approach may do better in time through parallelism, although at the expense of using much more memory.
-**TODO: ALEK: this is BS. If you want a parallel algorithm for BFS, here's one https://en.wikipedia.org/wiki/Parallel_single-source_shortest_path_algorithm**
-
-just do bfs lol
-
-**Future Work**
-There are a couple of other things that we could try to learn shortest paths better and maybe see more generalization. 
-- Chain of thought
-- Train model to output a path, not just the distance. Give it partial points for outputting anything that is a legitimate path (i.e., consists of real edges) and more points for getting the distance correct. 
+optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)
+loss_fn = nn.MSELoss()
+```
 
 ## References
+
+[^1]:  Lin, Hilton, and Evans. "TruthfulQA: Measuring How Models Mimic Human Falsehoods"
+[^2]: Nanda, Neel, Lawrence Chan, Tom Lieberum, Jess Smith, and Jacob Steinhardt. "Progress measures for grokking via mechanistic interpretability." arXiv preprint arXiv:2301.05217 (2023). https://arxiv.org/abs/2301.05217.
diff --git a/index.html b/index.html
index fd5f74f..b01beb3 100644
--- a/index.html
+++ b/index.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" />
   <meta name="generator" content="pandoc" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
-  <title>Discovering Graph Algorithms Using Transformers</title>
+  <title>Off-Distribution Generalization of Transformers for Shortest Paths</title>
   <style>
     html {
       color: #1a1a1a;
@@ -229,27 +229,7 @@
     code span.va { color: #19177c; } /* Variable */
     code span.vs { color: #4070a0; } /* VerbatimString */
     code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
-    /* CSS for citations */
-    div.csl-bib-body { }
-    div.csl-entry {
-      clear: both;
-      margin-bottom: 0em;
-    }
-    .hanging-indent div.csl-entry {
-      margin-left:2em;
-      text-indent:-2em;
-    }
-    div.csl-left-margin {
-      min-width:2em;
-      float:left;
-    }
-    div.csl-right-inline {
-      margin-left:2em;
-      padding-left:1em;
-    }
-    div.csl-indent {
-      margin-left: 2em;
-    }  </style>
+  </style>
   <script defer=""
   src="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.js"></script>
   <script>document.addEventListener("DOMContentLoaded", function () {
@@ -271,83 +251,78 @@
 </head>
 <body>
 <header id="title-block-header">
-<h1 class="title">Discovering Graph Algorithms Using Transformers</h1>
+<h1 class="title">Off-Distribution Generalization of Transformers for
+Shortest Paths</h1>
 </header>
+<style>
+body {
+    /* Shrink margins */
+    max-width: 45em;
+    /* font-size: 110%; */
+}
+
+/* Make sure toolbar plots aren't cut off */
+.mpld3-figure {
+    height: 480px;
+    display: block;
+    margin: auto;
+}
+</style>
+<!--
+BTW we should run a spell checker on this at some point before submitting 
+note to self
+-->
 <!-- Guidelines: https://www.dropbox.com/scl/fi/bet8enscln8ue36kd8t17/final_project_guidelines.pdf?rlkey=knd19cnumk51ho1y9crno56ib&e=2&dl=0 -->
 <div style="text-align:center">
 <p>Anthony Wang, Alek Westover, Kevin Zhao</p>
 <p>{xy,alekw,kevinmz}@mit.edu</p>
 </div>
-<h2 id="motivation">Motivation</h2>
-<p>Transformers–the architecture that powers LLMs–can do incredible
-feats: trained on hundreds of gigabytes of raw text, they can learn to
-hold natural conversations, reason about the physical world, and write
-code. Skeptics argue that LLMs are simply memorizing their datasets
-without gaining any deeper understanding. For instance, GPT’s o1 model,
-achieving 90th percentile on Codeforces, struggles with simple but
-bizarre algorithms problems such as “find the subarray of a 2D array
-with the minimum average”. In this project, we hope to explore
-<strong>when off-distribution generalization happens in a
-transformer</strong>. Paul Christiano proposed an experiment <a
-href="https://www.alignmentforum.org/posts/BxersHYN2qcFoonwg/experimentally-evaluating-whether-honesty-generalizes?commentId=dsDA2BWpHPdgLvaXX">here</a>
-about shortest paths in a graph to investigate this, so we decided to
-become the first to implement his experiment and put transformers’
-generalization abilities to the test.</p>
-<p>LLMs are notorious for making up complete nonsense, so we also hope
-that our project can shed light on when truthfulness generalizes. It’s
-generally desirable for LLMs to output true statements. One current
-approach for ensuring this is to have a human in the loop rewarding the
-model for true outputs (e.g. RLHF). However, humans can be poor judges
-of truthfulness and have many cognitive biases and superficial
-heuristics. A further challenge is that as LLMs become more capable,
-there might not even exist experts that are good judges of whether the
-models outputs, such as difficult mathematical proofs, are truthful. For
-instance, most Task Rabbit workers would probably be hard pressed to
-evaluate whether a difficult mathematical proof produced by an LLM is
-true. The entire mathematical community has been known on occasion to <a
-href="https://en.wikipedia.org/wiki/Grunwald%E2%80%93Wang_theorem">believe
-false statements for many years</a>.</p>
-<p>One possible solution is to reward an LLM for truthful behavior on
-simple inputs, and then hope that the LLM generalizes its truthful
-behavior for more complex inputs where humans cannot provide helpful
-labels. Deep learning models can be remarkably good at off-distribution
-generalization–for instance, a model trained to transform hand drawn
-cats into images of cats might be able to handle a “cat” with three eyes
-in an intuitive way. We might hope that generalizing truthfully is
-simple, thus promoted by “Occam’s Razor”.</p>
-<h2 id="related-work">Related Work</h2>
-<p>COMMENT FROM ALEK – please remove all mentions of graph neural
-networks – that is BS: there is no actual reason why you’d ever use a
-Neural network to solve shortest paths, the point of choosing a
-synthetic task is because there is a <strong>simple ground
-truth</strong> which makes it easy to evaluate whether or not our model
-is performing correctly. We’d also hoped that the simplicity of the task
-would make it more feasible to do with a limited compute budget, but
-apparently this task was too hard for our architecture.</p>
-<p>There has been some research into the algorithmic optimization of
-GNNs and how they may solve real-world issues; however, none of the
-related work targets using generic machine learning methods to solve
-graph problems.</p>
-<ul>
-<li><p>Cappart et al. has researched more into the Combinatorial
-Optimization of GNNs and developed algorithms for related tasks, thus
-facilitating machine learning <span class="citation"
-data-cites="DBLP:journals/corr/abs-2102-09544">(<a
-href="#ref-DBLP:journals/corr/abs-2102-09544"
-role="doc-biblioref">Cappart et al. 2021</a>)</span>. Their results are
-mostly algorithmic so we develop further by trading a bit of accuracy
-for much faster computation in such tasks.</p></li>
-<li><p>Tutsoy uses a graph-theory-based approach to model the
-epidemiological characteristics of infectious diseases, such as COVID-19
-<span class="citation" data-cites="10.1109/TPAMI.2023.3256421">(<a
-href="#ref-10.1109/TPAMI.2023.3256421" role="doc-biblioref">Tutsoy
-2023</a>)</span>. We understand from his paper how GNN optimization may
-also be useful in researching novel diseases.</p></li>
-</ul>
-<h2 id="task">Task</h2>
-<p>Our synthetic task is simple: compute the distance between various
-vertices in an input graph. To test off-distribution generalization, our
-experiment has three steps.</p>
+<h2 id="introduction">Introduction</h2>
+<h3 id="motivation">Motivation</h3>
+<p>Neural networks are capable of impressive feats of off-distribution
+generalization. For instance, a program trained to convert sketches of
+cats into realistic looking pictures of cats can draw a cat with three
+eyes, if the sketch has three eyes, even if such an image never occured
+in its training data. However, neural networks also often learn
+non-robust features that cause it to perform very poorly
+off-distribution (e.g., adversarial examples for an image classifier).
+In this project, we will investigate the question of when transformers
+generalize off-distribution through via a case study on a simple
+synthetic task. More specifically, the goal of our project is to make
+progress towards answering the following question:</p>
+<blockquote>
+<p>Suppose you train a model to be truthful on task A, and to produce
+coherent answers on a related task B.<br />
+Will the model learn to be truthful on task B?</p>
+</blockquote>
+<p>Here’s a more specific version of this question, inspired by a <a
+href="https://www.alignmentforum.org/posts/BxersHYN2qcFoonwg/experimentally-evaluating-whether-honesty-generalizes?commentId=dsDA2BWpHPdgLvaXX">blog
+post written by Paul Chrisiano</a> (who was influential in developing
+RLHF).</p>
+<blockquote>
+<p>Suppose we trained a language model on next token prediction in both
+English and French, and then fine-tuned the model to be truthful in
+English. Would the model then learn to answer questions truthfully in
+French? It has been shown <a href="#fn1" class="footnote-ref"
+id="fnref1" role="doc-noteref"><sup>1</sup></a> that this doesn’t happen
+by default, but you could hope that the truthfulness training in English
+would generalize to French.</p>
+</blockquote>
+<p>In the future, humans are likely to trust AIs with performing
+important and complex tasks. For some of these tasks it will be
+infeasible for humans to audit the answers – either because human labor
+is too expensive, or because the AIs are too competent, so humans aren’t
+skilled enough to perform an audit (for example, an amateur chess player
+might have a hard time telling apart a good chess move and a bad chess
+move). However, there will be some simpler tasks where we can audit the
+AI’s and reward truthul answers. So, it’s plausible that models trained
+to be truthful in some simple domains will learn truthfulness as a
+general lesson.</p>
+<h3 id="task">Task</h3>
+<p>In order to investigate generalization of this type in transformers,
+we decided to focus on a simple synthetic task – shortest paths in a
+graph. More specifically, we ran the following experiment (see <a
+href="#data">Data</a> for more details):</p>
 <ol type="1">
 <li><strong>Pre-train</strong> a transformer to predict the distance
 between vertices <span class="math inline">1</span> and <span
@@ -362,47 +337,289 @@ class="math inline">2</span> in graphs with <span class="math inline">n
 between vertex <span class="math inline">1</span> to <span
 class="math inline">t</span> for any <span class="math inline">t</span>
 on the shortest path from <span class="math inline">1</span> to <span
-class="math inline">2</span>, but only do fine-tuning on graphs with
-<span class="math inline">n \in [3,7]</span> vertices.</li>
+class="math inline">2</span>, but only do fine-tuning on small graphs
+with <span class="math inline">n \in [3,7]</span> vertices. Our
+intuition was that, in training a model to find distances between
+vertices <span class="math inline">1</span> and <span
+class="math inline">2</span>, the model would also implicitly need to
+learn about distances from vertex <span class="math inline">1</span> to
+other vertices on this shortest path.</li>
 </ol>
 <div style="text-align:center">
 <p><img src="img/finetune.svg" /></p>
 </div>
 <ol start="3" type="1">
-<li><strong>Test</strong> whether the transformer can accurately predict
-the distances between <span class="math inline">1</span> to <span
-class="math inline">t</span> for any <span class="math inline">t \leq
-7</span> on the shortest path from <span class="math inline">1</span> to
-<span class="math inline">2</span> for graphs with <span
-class="math inline">n \in [3,15]</span> vertices.</li>
+<li><strong>Test</strong> whether the transformer can accurately
+generalize to predicting the distance between <span
+class="math inline">1</span> to <span class="math inline">t</span> for
+any <span class="math inline">t \in [3,15]</span> on the shortest path
+from <span class="math inline">1</span> to <span
+class="math inline">2</span> for graphs with <span class="math inline">n
+\in [3,15]</span> vertices.</li>
 </ol>
 <div style="text-align:center">
 <p><img src="img/test.svg" /></p>
 </div>
-<h3 id="algorithm-for-shortest-paths">Algorithm for Shortest Paths</h3>
-<p>The standard algorithm to find the shortest path in a graph between
-vertices <span class="math inline">u</span> and <span
-class="math inline">v</span> is <strong>breadth-first search
-(BFS)</strong>, taught in every intro algorithms class. Initially, BFS
-starts at <span class="math inline">u</span>, and at each phase,
-explores a farther layer of vertices from <span
-class="math inline">u</span>. During a phase, BFS goes through every
-vertex in the current layer and adds any of their unvisited neighbors to
-the next layer. The algorithm terminates once we reach <span
-class="math inline">v</span> or if the next layer is empty. For a graph
-with <span class="math inline">V</span> vertices and <span
-class="math inline">E</span> edges, the runtime of BFS is <span
-class="math inline">O(V + E)</span>. BFS gives us an easy and fast way
-to find the ground truth answer for any graph, so that we can verify the
-accuracy of our machine learning approach.</p>
-<p>We hope that our model can learn BFS or some other simple,
-generalizable algorithm for shortest paths, because the model can’t just
-pull some magic number out of a hat but intuitively needs to count or
-list the vertices on the shortest path from <span
-class="math inline">1</span> to <span class="math inline">2</span>. In
-fact, we will show how to hand-craft a set of weights to implement BFS
-in a transformer, so it’s indeed theoretically possible for a
-transformer to achieve 100% accuracy.</p>
+<!-- ![Experiment description](traintunegeneralize.png) -->
+<p>Obviously, if you actually want to compute shortest paths in a graph,
+you should use the standard graph algorithm BFS (breadth-first search).
+Our motivation for using this problem to study generalization was
+two-fold:</p>
+<ol type="1">
+<li>It’s very simple to generate data and ground truth labels for this
+task.</li>
+<li>There is a simple circuit which computes shortest paths (exactly),
+by doing something akin to BFS. In fact, we implemented this simple
+circuit by hand in the section <a
+href="#handcrafted-bfs-transformer">Handcrafted BFS Transformer</a>. Our
+hope was that our model can also find a simple circuit that generalized
+well.</li>
+</ol>
+<!--
+ANT:  
+RE t<7
+The model couuld've decided to generalize for t > 7 too, but it didn't  
+so id say 
+we tested generalization for all t in 3, 15
+it succeeded for t in 3,7
+failed for t in 8 to 15
+reason is because it could do "if statements" instead of realizing that it should just be looking at the dor product
+mmm
+i feel like the fact that it works for 3,7 and not 8,15 is kind of compelling
+anyways this  is just a comment explaining how im going to / think you should pitch the generalization section
+(it couldve generalized all the way, but didnt, so what we learened is generalization sometimes happens but sometimes doesnt
+
+yeah that's the intuition
+can't really do interpretability to figure out if that's actually what's going on
+-->
+<h3 id="findings">Findings</h3>
+<p>Our main findings were as follows:</p>
+<ol type="1">
+<li><p>There exists a simple set of weights that you can find by hand to
+achieve arbitrarily small MSE loss on shortest paths. Moreover, this set
+of weights is “robust” in the sense that, if you perturb some of these
+weights and run gradient descent, the model will recover low
+loss.</p></li>
+<li><p>Transformers can learn on their own to perform shortest paths.
+The training process consists of a sequence of <strong>phase
+changes</strong> where the loss would barely change for a large number
+of epochs, and then over a very small number of epochs, the loss would
+dramatically go down. (TODO: Is this just because we’re bad at DL? now
+it shouldn’t have as many phase changes. Or maybe just in the
+beginning?) In particular, these phase changes corresponded to times
+when the model decided to, e.g., go from dramatically over-classifying
+paths as being length 1 (resulting in great performance on tasks of
+length 1 but poor performance on other tasks) to spreading these length
+1 guesses out to other distances.</p></li>
+<li><p>A model trained to compute the distance between two specific
+vertices can be fine-tuned efficiently to compute the shortest distance
+to other vertices.</p></li>
+<li><p>A model trained to compute the distance between vertices <span
+class="math inline">1</span> and <span class="math inline">2</span> on
+large graphs, and fine tuned to compute the distance between vertices 1
+and any vertex (on a shortest <span class="math inline">1</span>-<span
+class="math inline">2</span> path) in small graphs, has the following
+generalization properties:</p>
+<ul>
+<li>In larger graphs, the model can still compute the shortest distance
+between <span class="math inline">1</span> and any <span
+class="math inline">t</span> whose label existed in the smaller graphs,
+namely <span class="math inline">t \in [3,7]</span>.</li>
+<li>However, the model fails when the target vertex doesn’t exist in
+smaller graphs.</li>
+</ul></li>
+</ol>
+<p>The main conceptual implication of this work is that, while
+off-distribution generalization sometimes does occur, sometimes it also
+doesn’t, and it seems tricky to predict when off-distribution
+generalization will happen.</p>
+<h3 id="post-outline">Post Outline</h3>
+<p>The remainder of our post is structured as follows:</p>
+<ol type="1">
+<li><p><a href="#handcrafted-bfs-transformer">Handcrafted BFS
+Transformer</a>: To start, we explicitly construct by hand weights for a
+transformer to solve the shortest paths problem. We also investigate the
+“robustness” of our construction by showing that, starting from our
+constructed solution it’s possible to recover low loss after perturbing
+the weights via gradient descent. This gives a proof-of-concept that a
+transformer could encode a simple generalizable solution to the problem,
+and gives some reason to believe that a transformer could learn such a
+solution.</p></li>
+<li><p><a href="#training">Training</a>: Next, we discuss the training.
+First we describe the exact data generation process and model
+architecture. Then we discuss hyperparameter settings and optimization
+choices that were important to get the model to train efficiently.
+Finally, we investigate some interesting phenomena that occured during
+training, such as phase transitions in loss for different path
+lengths.</p></li>
+<li><p><a href="#generalization">Generalization</a>: Finally we discuss
+the phenomena that we wanted to study – generalization! We show that
+some generalization does occur, but other generalization doesn’t. We
+explain why we think this happens.</p></li>
+</ol>
+<h2 id="handcrafted-bfs-transformer">Handcrafted BFS Transformer</h2>
+<p>In this section we describe an explicit set of weights, made (with
+love) by hand, that result in a transformer that achieves MSE less than
+<span class="math inline">10^{-6}</span> for the shortest paths problem.
+We’ll start by describing the (somewhat involved) constructions, and
+then investigate the geometry of the loss landscape near our explicit
+solution. The main reason for doing this is to demonstrate the existence
+of a simple transformer computing shortest paths, to give
+proof-of-concept that the transformer could learn a generalizable
+solution.</p>
+<p>The construction is somewhat tricky to write down, so we’ll make
+several (non-essential) simplifying assumptions:</p>
+<ol type="1">
+<li>We only care about finding the distance between vertex 1 and vertex
+2.</li>
+<li>We are given a nice embedding of the graph.</li>
+</ol>
+<p>It will be obvious from the construction how to get rid of
+assumptions (1) and (2).</p>
+<p>The embedding of the graph that we will work with here will be
+different from the embeddings that we use in the rest of the post. In
+particular, in this section only the tokens will correspond to vertices.
+In the remainder of the post tokens will correspond to edges.</p>
+<p>More specifically, we will create the following embeddings for our
+input sequence:</p>
+<table>
+<colgroup>
+<col style="width: 25%" />
+<col style="width: 20%" />
+<col style="width: 22%" />
+<col style="width: 13%" />
+<col style="width: 18%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th>Answer token</th>
+<th><span class="math inline">v_{1}</span> token</th>
+<th><span class="math inline">\ldots</span></th>
+<th><span class="math inline">v_n</span> token</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Answer flag</td>
+<td>1</td>
+<td>0</td>
+<td></td>
+<td>0</td>
+</tr>
+<tr class="even">
+<td>Neighbors</td>
+<td></td>
+<td>01101</td>
+<td></td>
+<td>10010</td>
+</tr>
+<tr class="odd">
+<td>Reach</td>
+<td></td>
+<td>01101</td>
+<td></td>
+<td>10010</td>
+</tr>
+<tr class="even">
+<td>Out</td>
+<td>00000</td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr class="odd">
+<td>Self</td>
+<td></td>
+<td>10000</td>
+<td></td>
+<td>00001</td>
+</tr>
+<tr class="even">
+<td>Not-answer flag</td>
+<td>0</td>
+<td>1</td>
+<td></td>
+<td>1</td>
+</tr>
+</tbody>
+</table>
+<p>As you can see, the hidden dimension of the model will be <span
+class="math inline">4n+2</span>. The hidden dimension is broken up into
+5 sections, some of which are only used in the answer token, and some of
+which are only used in the vertex tokens. Here are what the different
+parts of the hidden dimension represent:</p>
+<ol type="1">
+<li>Flags: booleans indicating whether a vertex is the answer token or
+not. If desired, you can think of this as part of a “positional
+embedding”. The flags will not be updated as we move through the
+transformer.</li>
+<li>Neighbors: a boolean vector (of dimension <span
+class="math inline">n</span>) describing the neighbors of each vertex.
+(We just put zeros here for the answer token). This also won’t update
+through the course of running the transformer.</li>
+<li>Reach: a boolean vector representing the vertices that each vertex
+can reach. This will update over the course of a forward pass of the
+transformer. Specifically, at layer <span class="math inline">i</span>
+of the transformer we will extend the reach of each vertex from vertices
+which are distance <span class="math inline">i+1</span> away, to
+vertices at distance <span class="math inline">i+2</span> away.</li>
+<li>Out: this starts as zero for all tokens. Once vertex <span
+class="math inline">1</span> can reach vertex <span
+class="math inline">2</span> (i.e., we are in a layer deep enough that
+the reach vector for <span class="math inline">v_{1}</span>’s token
+indicates that it can reach vertex <span class="math inline">2</span>)
+we’ll start writing <span class="math inline">1</span> on the answer
+token’s out part of its embedding.</li>
+<li>Self: a 1-hot encoding specifying which vertex each token
+corresponds to. You should think of this as a positional embedding
+(along with the flags).</li>
+</ol>
+<p>The transformer will consist of <span class="math inline">n</span>
+layers, each of which has <span class="math inline">n+1</span> heads.
+We’ve included the code for the handcoded transformer in the <a
+href="#appendix">Appendix</a> – possibly it’s helpful to look at the
+code in conjunction with the written explanation below.</p>
+<p>In layer <span class="math inline">i</span>, our goal is to extend
+the reach of each vertex <span class="math inline">v</span> from the set
+of vertices at distance <span class="math inline">i+1</span> from <span
+class="math inline">v</span> to the set of vertices at distance <span
+class="math inline">i+2</span> from <span class="math inline">v</span>.
+The <span class="math inline">j</span>-th head will perform the
+following operation:</p>
+<pre><code>In head j at layer i:
+Vertex v&#39;s query vector will have high dot-product with vertex w&#39;s key vector
+iff vertex v can reach vertex j, and vertex w is neighbors with vertex j 
+
+The value that vertex w would give to vertex v is simply a 1-hot vector, with a 1 in the w-th position</code></pre>
+<p>After running the heads, each vertex’s embedding is updated to
+integrate the new vertices that it can reach into its embedding. Note
+that we can think of embeddings as being updated because transformers
+have a residual stream of embeddings that we make modifications to.</p>
+<p>Finally, the last head will be in charge of noticing whether vertex 1
+has reached vertex 2 yet, and if so recording this fact. For the full
+details, see <a href="#appendix">appendix</a>.</p>
+<h3 id="robustness">Robustness</h3>
+<p>The field of Singular Learning Theory (SLT; see Liam Carroll’s
+Master’s thesis “Phase Transitions in Neural Networks” for an
+introduction) aims to understand model training and loss-landscape
+geometry. In efforts to better understand the loss landscape of the
+shortest paths loss function according to the tokens used in our hand
+coded implementation of the shortest paths transformers, we decided to
+start at a good setting of the parameters, and then perturb the weights,
+and see if the model can subsequently achieve low loss. The intuition
+for why this is a good approach at measuring “how attractive of a loss
+basin” we have is that this experiment is similar to the Local Learning
+Coefficient from SLT. (see Lau, Edmund, Zach Furman, George Wang, Daniel
+Murfet, and Susan Wei. “The Local Learning Coefficient: A
+Singularity-Aware Complexity Measure”). We found that, perturbing the
+weights led to high loss, but gradient descent was able to recover low
+loss, indicating that the solution is somewhat “findable” by gradient
+descent.</p>
+<p>TODO: Switch to fancy mpld3 plots <img src="perturb.png"
+alt="perturb.png" /> <img src="perturb-loss.png"
+alt="perturb-loss.png" /></p>
+<h2 id="training">Training</h2>
 <h3 id="data">Data</h3>
 <p>We’ll represent an <span class="math inline">n</span> vertex, <span
 class="math inline">m</span> edge unweighted, undirected graph as
@@ -425,189 +642,76 @@ the input <span class="math inline">[1, 3, 3, 4, 2, 4, 2, 3, 0, 0, 0, 0,
 <div style="text-align:center">
 <p><img src="img/finetune.svg" /></p>
 </div>
-<p>We have three datasets for each step.</p>
+<p>We have a dataset for each step.</p>
 <ol type="1">
 <li><strong>Pre-train data</strong>: For each <span
 class="math inline">n \in [3,15]</span>, we generated an equal number of
 graphs on <span class="math inline">n</span> vertices, with <span
 class="math inline">t = 2</span>. Each graph was created by choosing
-<span class="math inline">n</span> random edges.</li>
-<li><strong>Fine-tune data</strong>: For each <span
+<span class="math inline">n</span> random edges. To balance the dataset,
+we removed some graphs with shorter paths to obtain the distribution
+shown below.</li>
+</ol>
+<p>TODO: Add axes titles, switch to bar plot</p>
+
+
+<style>
+
+</style>
+
+<div id="fig_el1200021399424053838726786165297"></div>
+<script>
+function mpld3_load_lib(url, callback){
+  var s = document.createElement('script');
+  s.src = url;
+  s.async = true;
+  s.onreadystatechange = s.onload = callback;
+  s.onerror = function(){console.warn("failed to load library " + url);};
+  document.getElementsByTagName("head")[0].appendChild(s);
+}
+
+if(typeof(mpld3) !== "undefined" && mpld3._mpld3IsLoaded){
+   // already loaded: just create the figure
+   !function(mpld3){
+       
+       mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+   }(mpld3);
+}else if(typeof define === "function" && define.amd){
+   // require.js is available: use it to load d3/mpld3
+   require.config({paths: {d3: "https://d3js.org/d3.v5"}});
+   require(["d3"], function(d3){
+      window.d3 = d3;
+      mpld3_load_lib("https://mpld3.github.io/js/mpld3.v0.5.10.js", function(){
+         
+         mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+      });
+    });
+}else{
+    // require.js not available: dynamically load d3 & mpld3
+    mpld3_load_lib("https://d3js.org/d3.v5.js", function(){
+         mpld3_load_lib("https://mpld3.github.io/js/mpld3.v0.5.10.js", function(){
+                 
+                 mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+            })
+         });
+}
+</script>
+<ol start="2" type="1">
+<li><p><strong>Fine-tune data</strong>: For each <span
 class="math inline">n \in [3,7]</span>, we generated an equal number of
 graphs on <span class="math inline">n</span> vertices each with a random
 <span class="math inline">t</span> on the shortest path from <span
 class="math inline">1</span> to <span class="math inline">2</span>.
 Again, each graph was created by choosing <span
-class="math inline">n</span> random edges.</li>
-<li><strong>Generalization test data</strong>: The same as the fine-tune
-data, except we sample <span class="math inline">n \in [3,15]</span> and
-<span class="math inline">t \leq 7</span>.</li>
+class="math inline">n</span> random edges.</p></li>
+<li><p><strong>Generalization test data</strong>: The same as the
+fine-tune data, except we sample <span class="math inline">n \in
+[3,15]</span> and <span class="math inline">t \leq 7</span>.</p></li>
 </ol>
 <p>We wrote some Python code to generate the data during the training
 loop, but Python is excruciatingly slow and data generation wasted a lot
 of training time. Our solution was to pre-generate the data before
 training using a multithreaded version of our Python code.</p>
-<h2
-id="complicated-explicit-transformer-formula-for-shortest-paths">Complicated
-explicit transformer formula for shortest paths</h2>
-<p>TODO: Kevin or Alek</p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode py"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Configuration</span></span>
-<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>NVTXS <span class="op">=</span> <span class="dv">16</span></span>
-<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>MAXDIST <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
-<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>AVGDEG <span class="op">=</span> <span class="dv">2</span></span>
-<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>SEQLEN <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
-<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>HIDDENDIM <span class="op">=</span> <span class="dv">4</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">2</span></span>
-<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Start indices for different sections of the input data</span></span>
-<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>START_REACH <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
-<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>START_OUT <span class="op">=</span> <span class="dv">2</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
-<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>START_SELF <span class="op">=</span> <span class="dv">3</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
-<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>SRC_FLAG_IDX <span class="op">=</span> START_SELF</span>
-<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>ANS_FLAG_IDX <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a>NOTANS_FLAG_IDX <span class="op">=</span> <span class="op">-</span><span class="dv">1</span></span>
-<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a>BIG <span class="op">=</span> <span class="dv">20</span></span>
-<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a>SUPABIG <span class="op">=</span> <span class="dv">100</span></span>
-<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a>MED <span class="op">=</span> <span class="dv">10</span></span>
-<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a>CURSE <span class="op">=</span> <span class="dv">5</span></span>
-<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> SillyTransformer(nn.Module):</span>
-<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> <span class="fu">__init__</span>(<span class="va">self</span>, device):</span>
-<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a>        <span class="bu">super</span>().<span class="fu">__init__</span>()</span>
-<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a>        <span class="va">self</span>.device <span class="op">=</span> device</span>
-<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> torch.no_grad():</span>
-<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a>            <span class="co"># Initialize weight parameters with specific configurations</span></span>
-<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostKs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostQs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostVs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> head <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">1</span>, NVTXS <span class="op">+</span> <span class="dv">1</span>):</span>
-<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a>                Q <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">2</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a>                Q[<span class="dv">0</span>, START_REACH <span class="op">-</span> <span class="dv">1</span> <span class="op">+</span> head] <span class="op">=</span> SUPABIG</span>
-<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a>                Q[<span class="dv">1</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a>                K <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">2</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, head] <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, ANS_FLAG_IDX] <span class="op">=</span> BIG</span>
-<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a>                V <span class="op">=</span> nn.Parameter(torch.zeros((NVTXS, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a>                <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
-<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a>                    V[i, START_SELF <span class="op">+</span> i] <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostKs.append(K)</span>
-<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostQs.append(Q)</span>
-<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostVs.append(V)</span>
-<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdKs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdQs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdVs <span class="op">=</span> nn.ParameterList()</span>
-<span id="cb1-51"><a href="#cb1-51" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> layer <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
-<span id="cb1-52"><a href="#cb1-52" aria-hidden="true" tabindex="-1"></a>                K <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">3</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-53"><a href="#cb1-53" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>BIG</span>
-<span id="cb1-54"><a href="#cb1-54" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, SRC_FLAG_IDX] <span class="op">=</span> BIG<span class="op">+</span>SUPABIG</span>
-<span id="cb1-55"><a href="#cb1-55" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>SUPABIG</span>
-<span id="cb1-56"><a href="#cb1-56" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, NVTXS <span class="op">+</span> <span class="dv">2</span>] <span class="op">=</span> BIG<span class="op">+</span>SUPABIG</span>
-<span id="cb1-57"><a href="#cb1-57" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, ANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>BIG<span class="op">-</span>SUPABIG</span>
-<span id="cb1-58"><a href="#cb1-58" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">2</span>, ANS_FLAG_IDX] <span class="op">=</span> MED</span>
-<span id="cb1-59"><a href="#cb1-59" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-60"><a href="#cb1-60" aria-hidden="true" tabindex="-1"></a>                Q <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">3</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-61"><a href="#cb1-61" aria-hidden="true" tabindex="-1"></a>                Q[:, ANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb1-62"><a href="#cb1-62" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-63"><a href="#cb1-63" aria-hidden="true" tabindex="-1"></a>                V <span class="op">=</span> nn.Parameter(torch.zeros((NVTXS, HIDDENDIM), device<span class="op">=</span>device))</span>
-<span id="cb1-64"><a href="#cb1-64" aria-hidden="true" tabindex="-1"></a>                V[layer, SRC_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb1-65"><a href="#cb1-65" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-66"><a href="#cb1-66" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdKs.append(K)</span>
-<span id="cb1-67"><a href="#cb1-67" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdQs.append(Q)</span>
-<span id="cb1-68"><a href="#cb1-68" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdVs.append(V)</span>
-<span id="cb1-69"><a href="#cb1-69" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-70"><a href="#cb1-70" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> forward(<span class="va">self</span>, src):</span>
-<span id="cb1-71"><a href="#cb1-71" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> layer <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
-<span id="cb1-72"><a href="#cb1-72" aria-hidden="true" tabindex="-1"></a>            allKs <span class="op">=</span> [<span class="va">self</span>.weirdKs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostKs]</span>
-<span id="cb1-73"><a href="#cb1-73" aria-hidden="true" tabindex="-1"></a>            allQs <span class="op">=</span> [<span class="va">self</span>.weirdQs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostQs]</span>
-<span id="cb1-74"><a href="#cb1-74" aria-hidden="true" tabindex="-1"></a>            allVs <span class="op">=</span> [<span class="va">self</span>.weirdVs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostVs]</span>
-<span id="cb1-75"><a href="#cb1-75" aria-hidden="true" tabindex="-1"></a>            head_outputs <span class="op">=</span> []</span>
-<span id="cb1-76"><a href="#cb1-76" aria-hidden="true" tabindex="-1"></a>            </span>
-<span id="cb1-77"><a href="#cb1-77" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> (K, Q, V) <span class="kw">in</span> <span class="bu">zip</span>(allKs, allQs, allVs):</span>
-<span id="cb1-78"><a href="#cb1-78" aria-hidden="true" tabindex="-1"></a>                ksrc <span class="op">=</span> torch.matmul(src, K.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
-<span id="cb1-79"><a href="#cb1-79" aria-hidden="true" tabindex="-1"></a>                qsrc <span class="op">=</span> torch.matmul(src, Q.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
-<span id="cb1-80"><a href="#cb1-80" aria-hidden="true" tabindex="-1"></a>                vsrc <span class="op">=</span> torch.matmul(src, V.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
-<span id="cb1-81"><a href="#cb1-81" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-82"><a href="#cb1-82" aria-hidden="true" tabindex="-1"></a>                scores <span class="op">=</span> torch.matmul(qsrc, ksrc.transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
-<span id="cb1-83"><a href="#cb1-83" aria-hidden="true" tabindex="-1"></a>                attention_weights <span class="op">=</span> torch.softmax(scores, dim<span class="op">=-</span><span class="dv">1</span>)</span>
-<span id="cb1-84"><a href="#cb1-84" aria-hidden="true" tabindex="-1"></a>                head_output <span class="op">=</span> torch.matmul(attention_weights, vsrc)</span>
-<span id="cb1-85"><a href="#cb1-85" aria-hidden="true" tabindex="-1"></a>                head_outputs.append(head_output)</span>
-<span id="cb1-86"><a href="#cb1-86" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-87"><a href="#cb1-87" aria-hidden="true" tabindex="-1"></a>            new_reaches <span class="op">=</span> <span class="bu">sum</span>(head_outputs[<span class="dv">1</span>:])</span>
-<span id="cb1-88"><a href="#cb1-88" aria-hidden="true" tabindex="-1"></a>            BSZ <span class="op">=</span> new_reaches.shape[<span class="dv">0</span>]</span>
-<span id="cb1-89"><a href="#cb1-89" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-90"><a href="#cb1-90" aria-hidden="true" tabindex="-1"></a>            nodelta_nbrs <span class="op">=</span> torch.zeros((BSZ, SEQLEN, NVTXS <span class="op">+</span> <span class="dv">1</span>), device<span class="op">=</span><span class="va">self</span>.device)</span>
-<span id="cb1-91"><a href="#cb1-91" aria-hidden="true" tabindex="-1"></a>            morepadlol <span class="op">=</span> torch.zeros((BSZ, SEQLEN, <span class="dv">1</span> <span class="op">+</span> NVTXS), device<span class="op">=</span><span class="va">self</span>.device)</span>
-<span id="cb1-92"><a href="#cb1-92" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-93"><a href="#cb1-93" aria-hidden="true" tabindex="-1"></a>            src <span class="op">=</span> src <span class="op">+</span> torch.cat((nodelta_nbrs, new_reaches, head_outputs[<span class="dv">0</span>], morepadlol), dim<span class="op">=</span><span class="dv">2</span>)</span>
-<span id="cb1-94"><a href="#cb1-94" aria-hidden="true" tabindex="-1"></a>            src[:, :, START_REACH:START_REACH <span class="op">+</span> NVTXS] <span class="op">=</span> <span class="dv">2</span> <span class="op">*</span> torch.sigmoid(src[:, :, START_REACH:START_REACH <span class="op">+</span> NVTXS] <span class="op">*</span> CURSE) <span class="op">-</span> <span class="dv">1</span></span>
-<span id="cb1-95"><a href="#cb1-95" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb1-96"><a href="#cb1-96" aria-hidden="true" tabindex="-1"></a>        canreach <span class="op">=</span> src[:, <span class="dv">0</span>, START_OUT:START_OUT <span class="op">+</span> NVTXS]</span>
-<span id="cb1-97"><a href="#cb1-97" aria-hidden="true" tabindex="-1"></a>        final_output <span class="op">=</span> <span class="dv">1</span> <span class="op">+</span> torch.<span class="bu">sum</span>(<span class="dv">1</span> <span class="op">-</span> canreach, dim<span class="op">=</span><span class="dv">1</span>)</span>
-<span id="cb1-98"><a href="#cb1-98" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> final_output</span></code></pre></div>
-<p>After much deliberation, we decided the next step for us was to
-customize a transformer, writing it ourselves. We observed that we
-wished for the transformer to do similar work as a BFS. As a result, we
-decided to work with the following transformer, for a graph with <span
-class="math inline">n</span> vertices <span class="math inline">v_1,
-v_2, \cdots, v_n</span>:</p>
-<span class="math display">\begin{array}{|c|c|c|c|c|c}
-\text{ANS} &amp; v_{1} &amp; v_{2} &amp; \cdots &amp; v_{n} &amp; \\
-\hline
-1 &amp; 0 &amp; 0 &amp; \cdots &amp; 0 &amp; \text{ANS}\\ \hline
-\text{ANS} &amp; \text{NBR}_{1} &amp; \text{NBR}_{2} &amp; \cdots &amp;
-\text{NBR}_{n} &amp; \text{NBR}\\ \hline
-\text{ANS} &amp; \text{REACH}_{1} &amp; \text{REACH}_{2} &amp; \cdots
-&amp; \text{REACH}_{n} &amp; \text{REACH}\\ \hline
-\text{ANS} &amp; \text{SELF}_{1} &amp; \text{SELF}_{2} &amp; \cdots
-&amp; \text{SELF}_{n} &amp; \text{SELF}\\ \hline
-V_{\text{OUT}} &amp; NULL&amp; NULL&amp; NULL&amp; NULL&amp;
-\text{OUT}\\ \hline
-0 &amp; 1 &amp; 1 &amp; \cdots &amp;1 &amp; \text{NOT}\\ \hline
-\end{array}</span>
-<p>Specifically, we see that <span
-class="math inline">\text{NBR}_{i}</span> is a <span
-class="math inline">n \times 1</span> vector detailing which of the
-vertices are neighboring vertex <span class="math inline">v_i</span>, so
-the <span class="math inline">j</span>th element of <span
-class="math inline">v_i</span> is <span class="math inline">1</span> if
-<span class="math inline">v_i</span> and <span
-class="math inline">v_j</span> are neighboring vertices, and <span
-class="math inline">0</span> otherwise. Additionally, <span
-class="math inline">\text{SELF}_{i}</span> is just the <span
-class="math inline">n \times 1</span> vector with the <span
-class="math inline">i</span>th element <span
-class="math inline">1</span> and all other elements <span
-class="math inline">0</span> (e.g. the one-hot encoding of the vector).
-Now, at every step, the <span class="math inline">\text{REACH}_k</span>
-vector for all <span class="math inline">k</span> is updated based on
-the previous <span class="math inline">\text{REACH}_k</span> vector and
-<span class="math inline">\text{NBR}_{k}</span> (since all entries that
-are <span class="math inline">1</span> in <span
-class="math inline">\text{REACH}_k\text{NBR}_{k}^T</span> must be
-updated in the manner such that if the <span class="math inline">(i,
-j)</span>th element of <span
-class="math inline">\text{REACH}_k\text{NBR}_{k}^T</span> is <span
-class="math inline">1</span>, then <span
-class="math inline">\text{REACH}_i</span>’s <span
-class="math inline">j</span>th column is set to <span
-class="math inline">1</span>. This is equivalent to adding <span
-class="math inline">\text{REACH}_k</span> to each integer <span
-class="math inline">i</span> where <span
-class="math inline">\text{NBR}_{k}</span>’s <span
-class="math inline">i</span>th entry is nonzero.</p>
-<p>This iterates through all the vertices, and at the end, we may see
-what run we are on to update <span
-class="math inline">V_{\text{OUT}}</span>.</p>
-<h2 id="perturbing-the-weights">Perturbing the Weights</h2>
-<p>SLT folks like to think about geometry of loss landscape CITE So we
-did an experiment where we mess with the weights.</p>
-<p>Findings: XXX</p>
-<h2 id="our-model">Our Model</h2>
 <h3 id="architecture">Architecture</h3>
 <p>We used a standard transformer architecture. To ensure that it can in
 theory learn BFS, we ensured that the number of layers in our
@@ -625,92 +729,315 @@ embedding of <span class="math inline">b</span>. For the target vertex
 class="math inline">(t,TARGET)</span> in the same way.</p>
 <!-- https://cocreate.csail.mit.edu/r/sxArTEXiAgJshznmm -->
 <p><img src="img/embeddings.svg" /></p>
-<h3 id="training">Training</h3>
-<p>To match the BFS transformer as closely as possible, we used a model
-dimension of <span class="math inline">64</span>, <span
-class="math inline">11</span> layers, and <span
-class="math inline">2</span> heads per layer, for a total of 550433
-parameters. In 32-bit float precision, that corresponds to around <span
-class="math inline">1.76\cdot10^6</span> bits. The number of possible
-graphs on 15 vertices generated using our procedure is approximately</p>
-<p><span class="math display">\frac{\binom{15}{2}^{15}}{15!} =
-1.59\cdot10^{18}.</span></p>
-<p>This is because there are <span
+<h3 id="specifications">Specifications</h3>
+<p>For our training run, we used the following specifications:</p>
+<table>
+<thead>
+<tr class="header">
+<th>Hyperparameter</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Model dimension</td>
+<td><span class="math inline">64</span></td>
+</tr>
+<tr class="even">
+<td>Layers</td>
+<td><span class="math inline">11</span></td>
+</tr>
+<tr class="odd">
+<td>Heads</td>
+<td><span class="math inline">2</span></td>
+</tr>
+<tr class="even">
+<td>Epochs</td>
+<td><span class="math inline">100</span></td>
+</tr>
+<tr class="odd">
+<td>Batch size</td>
+<td><span class="math inline">2^{15}</span></td>
+</tr>
+<tr class="even">
+<td>Dropout</td>
+<td><span class="math inline">0</span></td>
+</tr>
+<tr class="odd">
+<td>Training examples</td>
+<td><span class="math inline">10^8</span></td>
+</tr>
+<tr class="even">
+<td>Parameters</td>
+<td><span class="math inline">550433</span></td>
+</tr>
+<tr class="odd">
+<td>Learning rate</td>
+<td><span class="math inline">5\cdot10^{-4}</span></td>
+</tr>
+<tr class="even">
+<td>Loss</td>
+<td>MSE</td>
+</tr>
+<tr class="odd">
+<td>Optimizer</td>
+<td>Adam</td>
+</tr>
+</tbody>
+</table>
+<p>The number of bits required to store the model parameters in float32
+is around <span class="math inline">1.76\cdot10^6</span>. The number of
+possible graphs on 15 vertices generated using our procedure is
+approximately <span class="math display">\frac{\binom{15}{2}^{15}}{15!}
+\approx 1.59\cdot10^{18}.</span> This is because there are <span
 class="math inline">\binom{15}{2}</span> choices for each of the 15
 edges and we don’t care about the order of the edges. This is only an
 approximation because some edges might be duplicated. Each graph has an
 answer between 1 and 15 which requires around 4 bits, so memorizing all
-the answers requires <span class="math inline">4\cdot1.59\cdot10^{18} =
-6.36\cdot10^{18}</span> bits, which is <span
-class="math inline">3.61\cdot10^{12}</span> times larger than our model
-size.</p>
-<p>To train the model, we used MSE loss, the Adam optimizer, a learning
-rate of <span class="math inline">3\cdot10^{-4}</span>, and a batch size
-of <span class="math inline">2^{15}</span> for one billion randomly
-generated graphs. A training run takes roughly eight hours to run on a
-Radeon 7900 XTX graphics card. Our final MSE loss was <span
-class="math inline">0.000555</span>.</p>
-<p>TODO: use https://mpld3.github.io/index.html to make interactive
-plots</p>
-<p><img src="training-loss.png" /></p>
-<p><img src="training-2d-histogram.png" /></p>
-<p>One pattern we consistently noticed during training is that the model
-often gets stuck and plateaus for many epochs before rapidly decreasing.
-For instance, this happened between epochs 100 and 300 in the graph
-above:</p>
-<p><img src="grokking.png" /></p>
-<p>“grokking” hypothesis: it’s memorizing all length 2 paths?</p>
-<p>TODO: cite Neel Nanda grokking modular addition</p>
-<p>TODO: CRAZY!!! training curves for 1, 2, 3 length paths</p>
+the answers requires requires <span
+class="math inline">4\cdot1.59\cdot10^{18} = 6.36\cdot10^{18}</span>
+bits, which is <span class="math inline">3.61\cdot10^{12}</span> times
+larger than our model size. This implies that in order to get really low
+loss, our model needs to do something other than brute memorization.</p>
+<p>A single training run takes roughly eight hours to run on a Radeon
+7900 XTX graphics card.</p>
 <p>One pitfall we encountered during training is that we initially used
 bfloat16 to save VRAM, but our huge batch size caused loss-of-precision
 problems and made training very difficult. It took us two weeks to debug
 this until we found that switching to float32 improved training
 significantly.</p>
-<h2 id="fine-tuning-results">Fine tuning results</h2>
+<h3 id="training-results">Training Results</h3>
+<p>Here is our training loss curve:</p>
+<p>$include<code>raw="html"</code> plots/train-loss.html</p>
+<p>Here is a “confusion matrix”-like plot, showing for each true label,
+the probability distribution over classifications that the model
+gives.</p>
+<p>$include<code>raw="html"</code> plots/train-hist.html</p>
+<p>One pattern we consistently noticed during training is that the model
+often gets stuck and plateaus for many epochs before rapidly decreasing.
+For instance, this happened between epochs 100 and 300 in the graph
+above:</p>
+<p>$include<code>raw="html"</code> plots/grokking.html</p>
+<p>Such <strong>phase change</strong> phenomena are ubiquitous in deep
+learning and for simple algorithmic tasks we can sometimes interpret
+them <a href="#fn2" class="footnote-ref" id="fnref2"
+role="doc-noteref"><sup>2</sup></a>.</p>
+<p><strong>TODO ANT: training curves for 1, 2, 3 length paths. and add
+some text about why this is an interesting finding.</strong></p>
+<p>$include<code>raw="html"</code> plots/len-loss.html</p>
+<h2 id="generalization">Generalization</h2>
+<h3 id="fine-tuning-results">Fine Tuning Results</h3>
 <p>After receiving our initial results, we fine-tuned with a learning
 rate of 1e-5, also with MSE and the same batch size. Our final results
 are shown in the images below.</p>
-<p><img src="fine-tuning-loss.png" /></p>
-<p><img src="fine-tuning-2d-histogram.png" /></p>
-<p><img src="test-2d-histogram.png" /></p>
-<p>TODO: get new graphs</p>
-<p>It’s pretty good!!!</p>
-<p>Can only generalize to target vertices from 2 to 7 since 8 through 15
-didn’t appear in the fine-tune data</p>
-<p>but this still means it</p>
+<p>$include<code>raw="html"</code> plots/tune-loss.html</p>
+<p>$include<code>raw="html"</code> plots/tune-hist.html</p>
+<p>$include<code>raw="html"</code>
+plots/test-onpath-largetarget.html</p>
+<p>$include<code>raw="html"</code>
+plots/test-onpath-smalltarget.html</p>
+<p>$include<code>raw="html"</code> plots/test-small-any.html</p>
+<p>$include<code>raw="html"</code> plots/test-large-any.html</p>
 <h2 id="conclusion">Conclusion</h2>
-<p>however, a machine learning approach may do better in time through
-parallelism, although at the expense of using much more memory.
-<strong>TODO: ALEK: this is BS. If you want a parallel algorithm for
-BFS, here’s one
-https://en.wikipedia.org/wiki/Parallel_single-source_shortest_path_algorithm</strong></p>
-<p>just do bfs lol</p>
-<p><strong>Future Work</strong> There are a couple of other things that
-we could try to learn shortest paths better and maybe see more
-generalization. - Chain of thought - Train model to output a path, not
-just the distance. Give it partial points for outputting anything that
-is a legitimate path (i.e., consists of real edges) and more points for
-getting the distance correct.</p>
-<h2 class="unnumbered" id="references">References</h2>
-<div id="refs" class="references csl-bib-body hanging-indent"
-data-entry-spacing="0" role="list">
-<div id="ref-DBLP:journals/corr/abs-2102-09544" class="csl-entry"
-role="listitem">
-Cappart, Quentin, Didier Chételat, Elias B. Khalil, Andrea Lodi,
-Christopher Morris, and Petar Velickovic. 2021. <span>“Combinatorial
-Optimization and Reasoning with Graph Neural Networks.”</span>
-<em>CoRR</em> abs/2102.09544. <a
-href="https://arxiv.org/abs/2102.09544">https://arxiv.org/abs/2102.09544</a>.
-</div>
-<div id="ref-10.1109/TPAMI.2023.3256421" class="csl-entry"
-role="listitem">
-Tutsoy, Onder. 2023. <span>“Graph Theory Based Large-Scale Machine
-Learning with Multi-Dimensional Constrained Optimization Approaches for
-Exact Epidemiological Modeling of Pandemic Diseases.”</span> <em>IEEE
-Trans. Pattern Anal. Mach. Intell.</em> 45 (8): 9836–45. <a
-href="https://doi.org/10.1109/TPAMI.2023.3256421">https://doi.org/10.1109/TPAMI.2023.3256421</a>.
-</div>
-</div>
+<p>In this post we’ve investigated solving the shortest paths problem
+with a transformer. We’ve shown that this problem has a simple solution
+that can be implemented by a transformer. We’ve shown that a transformer
+can learn to solve this problem. We’ve shown that when the transformer
+learns to solve this problem it also implicitly builds good internal
+representations of the input data that allow for efficient fine tuning
+to find shortest paths between other vertices. Finally, we’ve seen that
+some off-distribution generalization does occur, and some does not! The
+main conceptual take-away from our work is that it’s hard to predict
+when models will and won’t generalize.</p>
+<h2 id="appendix">Appendix</h2>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> torch</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> torch.nn <span class="im">as</span> nn</span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> random</span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> collections <span class="im">import</span> deque</span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Configuration</span></span>
+<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>NVTXS <span class="op">=</span> <span class="dv">8</span></span>
+<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a>MAXDIST <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
+<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a>AVGDEG <span class="op">=</span> <span class="dv">2</span></span>
+<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a>SEQLEN <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
+<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a>HIDDENDIM <span class="op">=</span> <span class="dv">4</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">2</span></span>
+<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a>START_REACH <span class="op">=</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
+<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a>START_OUT <span class="op">=</span> <span class="dv">2</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
+<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a>START_SELF <span class="op">=</span> <span class="dv">3</span> <span class="op">*</span> NVTXS <span class="op">+</span> <span class="dv">1</span></span>
+<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a>SRC_FLAG_IDX <span class="op">=</span> START_SELF</span>
+<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a>ANS_FLAG_IDX <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a>NOTANS_FLAG_IDX <span class="op">=</span> <span class="op">-</span><span class="dv">1</span></span>
+<span id="cb2-18"><a href="#cb2-18" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a><span class="co"># Determine device</span></span>
+<span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a>device <span class="op">=</span> torch.device(<span class="st">&quot;cuda&quot;</span> <span class="cf">if</span> torch.cuda.is_available() <span class="cf">else</span> <span class="st">&quot;cpu&quot;</span>)</span>
+<span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-22"><a href="#cb2-22" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> random_graph(device):</span>
+<span id="cb2-23"><a href="#cb2-23" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;&quot;&quot;Generate a random graph tensor.&quot;&quot;&quot;</span></span>
+<span id="cb2-24"><a href="#cb2-24" aria-hidden="true" tabindex="-1"></a>    data <span class="op">=</span> torch.zeros((SEQLEN, HIDDENDIM), device<span class="op">=</span>device)</span>
+<span id="cb2-25"><a href="#cb2-25" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb2-26"><a href="#cb2-26" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Mark self vertices</span></span>
+<span id="cb2-27"><a href="#cb2-27" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">1</span>, NVTXS <span class="op">+</span> <span class="dv">1</span>):</span>
+<span id="cb2-28"><a href="#cb2-28" aria-hidden="true" tabindex="-1"></a>        data[i, START_SELF <span class="op">-</span> <span class="dv">1</span> <span class="op">+</span> i] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-29"><a href="#cb2-29" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-30"><a href="#cb2-30" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Create adjacency list</span></span>
+<span id="cb2-31"><a href="#cb2-31" aria-hidden="true" tabindex="-1"></a>    adj_list <span class="op">=</span> [<span class="bu">set</span>() <span class="cf">for</span> _ <span class="kw">in</span> <span class="bu">range</span>(SEQLEN)]</span>
+<span id="cb2-32"><a href="#cb2-32" aria-hidden="true" tabindex="-1"></a>    indices <span class="op">=</span> [random.randint(<span class="dv">1</span>, NVTXS) <span class="cf">for</span> _ <span class="kw">in</span> <span class="bu">range</span>(AVGDEG <span class="op">*</span> NVTXS)]</span>
+<span id="cb2-33"><a href="#cb2-33" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb2-34"><a href="#cb2-34" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">0</span>, <span class="bu">len</span>(indices), <span class="dv">2</span>):</span>
+<span id="cb2-35"><a href="#cb2-35" aria-hidden="true" tabindex="-1"></a>        u <span class="op">=</span> indices[i]</span>
+<span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a>        v <span class="op">=</span> indices[i <span class="op">+</span> <span class="dv">1</span>]</span>
+<span id="cb2-37"><a href="#cb2-37" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> u <span class="op">!=</span> v:</span>
+<span id="cb2-38"><a href="#cb2-38" aria-hidden="true" tabindex="-1"></a>            <span class="co"># Bidirectional connections</span></span>
+<span id="cb2-39"><a href="#cb2-39" aria-hidden="true" tabindex="-1"></a>            data[v, u] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-40"><a href="#cb2-40" aria-hidden="true" tabindex="-1"></a>            data[u, v] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-41"><a href="#cb2-41" aria-hidden="true" tabindex="-1"></a>            data[v, NVTXS <span class="op">+</span> u] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-42"><a href="#cb2-42" aria-hidden="true" tabindex="-1"></a>            data[u, NVTXS <span class="op">+</span> v] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-43"><a href="#cb2-43" aria-hidden="true" tabindex="-1"></a>            adj_list[u].add(v)</span>
+<span id="cb2-44"><a href="#cb2-44" aria-hidden="true" tabindex="-1"></a>            adj_list[v].add(u)</span>
+<span id="cb2-45"><a href="#cb2-45" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-46"><a href="#cb2-46" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Set flags</span></span>
+<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a>    data[<span class="dv">0</span>, ANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a>    data[<span class="dv">1</span>:, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-49"><a href="#cb2-49" aria-hidden="true" tabindex="-1"></a>    data[<span class="dv">0</span>, START_REACH:START_REACH <span class="op">+</span> NVTXS] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-50"><a href="#cb2-50" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data, adj_list</span>
+<span id="cb2-51"><a href="#cb2-51" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-52"><a href="#cb2-52" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> SSSP(G):</span>
+<span id="cb2-53"><a href="#cb2-53" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;&quot;&quot;Single Source Shortest Path algorithm.&quot;&quot;&quot;</span></span>
+<span id="cb2-54"><a href="#cb2-54" aria-hidden="true" tabindex="-1"></a>    dist <span class="op">=</span> [MAXDIST <span class="cf">for</span> _ <span class="kw">in</span> G]</span>
+<span id="cb2-55"><a href="#cb2-55" aria-hidden="true" tabindex="-1"></a>    dist[<span class="dv">1</span>] <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb2-56"><a href="#cb2-56" aria-hidden="true" tabindex="-1"></a>    frontier <span class="op">=</span> deque([<span class="dv">1</span>])</span>
+<span id="cb2-57"><a href="#cb2-57" aria-hidden="true" tabindex="-1"></a>    <span class="cf">while</span> frontier:</span>
+<span id="cb2-58"><a href="#cb2-58" aria-hidden="true" tabindex="-1"></a>        vtx <span class="op">=</span> frontier.popleft()</span>
+<span id="cb2-59"><a href="#cb2-59" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> x <span class="kw">in</span> G[vtx]:</span>
+<span id="cb2-60"><a href="#cb2-60" aria-hidden="true" tabindex="-1"></a>            <span class="cf">if</span> dist[x] <span class="op">==</span> MAXDIST:</span>
+<span id="cb2-61"><a href="#cb2-61" aria-hidden="true" tabindex="-1"></a>                dist[x] <span class="op">=</span> <span class="dv">1</span> <span class="op">+</span> dist[vtx]</span>
+<span id="cb2-62"><a href="#cb2-62" aria-hidden="true" tabindex="-1"></a>                frontier.append(x)</span>
+<span id="cb2-63"><a href="#cb2-63" aria-hidden="true" tabindex="-1"></a>                <span class="cf">if</span> x <span class="op">==</span> <span class="dv">2</span>:</span>
+<span id="cb2-64"><a href="#cb2-64" aria-hidden="true" tabindex="-1"></a>                    <span class="cf">return</span> dist[<span class="dv">2</span>]</span>
+<span id="cb2-65"><a href="#cb2-65" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> MAXDIST</span>
+<span id="cb2-66"><a href="#cb2-66" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-67"><a href="#cb2-67" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mkbatch(size):</span>
+<span id="cb2-68"><a href="#cb2-68" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;&quot;&quot;Create a batch of graph data.&quot;&quot;&quot;</span></span>
+<span id="cb2-69"><a href="#cb2-69" aria-hidden="true" tabindex="-1"></a>    graphs <span class="op">=</span> []</span>
+<span id="cb2-70"><a href="#cb2-70" aria-hidden="true" tabindex="-1"></a>    distances <span class="op">=</span> []</span>
+<span id="cb2-71"><a href="#cb2-71" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-72"><a href="#cb2-72" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> _ <span class="kw">in</span> <span class="bu">range</span>(size):</span>
+<span id="cb2-73"><a href="#cb2-73" aria-hidden="true" tabindex="-1"></a>        data, adj_list <span class="op">=</span> random_graph(device)</span>
+<span id="cb2-74"><a href="#cb2-74" aria-hidden="true" tabindex="-1"></a>        dist <span class="op">=</span> SSSP(adj_list)</span>
+<span id="cb2-75"><a href="#cb2-75" aria-hidden="true" tabindex="-1"></a>        graphs.append(data)</span>
+<span id="cb2-76"><a href="#cb2-76" aria-hidden="true" tabindex="-1"></a>        distances.append(dist)</span>
+<span id="cb2-77"><a href="#cb2-77" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-78"><a href="#cb2-78" aria-hidden="true" tabindex="-1"></a>    data <span class="op">=</span> torch.stack(graphs)</span>
+<span id="cb2-79"><a href="#cb2-79" aria-hidden="true" tabindex="-1"></a>    labels <span class="op">=</span> torch.tensor(distances, dtype<span class="op">=</span>torch.float32, device<span class="op">=</span>device)</span>
+<span id="cb2-80"><a href="#cb2-80" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data, labels</span>
+<span id="cb2-81"><a href="#cb2-81" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb2-82"><a href="#cb2-82" aria-hidden="true" tabindex="-1"></a>BIG,SUPABIG,MED,CURSE <span class="op">=</span> <span class="dv">12</span>,<span class="dv">30</span>,<span class="dv">7</span>,<span class="dv">5</span></span>
+<span id="cb2-83"><a href="#cb2-83" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-84"><a href="#cb2-84" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> SillyTransformer(nn.Module):</span>
+<span id="cb2-85"><a href="#cb2-85" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> <span class="fu">__init__</span>(<span class="va">self</span>, device):</span>
+<span id="cb2-86"><a href="#cb2-86" aria-hidden="true" tabindex="-1"></a>        <span class="bu">super</span>().<span class="fu">__init__</span>()</span>
+<span id="cb2-87"><a href="#cb2-87" aria-hidden="true" tabindex="-1"></a>        <span class="va">self</span>.device <span class="op">=</span> device</span>
+<span id="cb2-88"><a href="#cb2-88" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-89"><a href="#cb2-89" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> torch.no_grad():</span>
+<span id="cb2-90"><a href="#cb2-90" aria-hidden="true" tabindex="-1"></a>            <span class="co"># Initialize weight parameters with specific configurations</span></span>
+<span id="cb2-91"><a href="#cb2-91" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostKs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-92"><a href="#cb2-92" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostQs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-93"><a href="#cb2-93" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.mostVs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-94"><a href="#cb2-94" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> head <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">1</span>, NVTXS <span class="op">+</span> <span class="dv">1</span>):</span>
+<span id="cb2-95"><a href="#cb2-95" aria-hidden="true" tabindex="-1"></a>                Q <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">2</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-96"><a href="#cb2-96" aria-hidden="true" tabindex="-1"></a>                Q[<span class="dv">0</span>, START_REACH <span class="op">-</span> <span class="dv">1</span> <span class="op">+</span> head] <span class="op">=</span> SUPABIG</span>
+<span id="cb2-97"><a href="#cb2-97" aria-hidden="true" tabindex="-1"></a>                Q[<span class="dv">1</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-98"><a href="#cb2-98" aria-hidden="true" tabindex="-1"></a>                K <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">2</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-99"><a href="#cb2-99" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, head] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-100"><a href="#cb2-100" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, ANS_FLAG_IDX] <span class="op">=</span> BIG</span>
+<span id="cb2-101"><a href="#cb2-101" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-102"><a href="#cb2-102" aria-hidden="true" tabindex="-1"></a>                V <span class="op">=</span> nn.Parameter(torch.zeros((NVTXS, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-103"><a href="#cb2-103" aria-hidden="true" tabindex="-1"></a>                <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
+<span id="cb2-104"><a href="#cb2-104" aria-hidden="true" tabindex="-1"></a>                    V[i, START_SELF <span class="op">+</span> i] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-105"><a href="#cb2-105" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-106"><a href="#cb2-106" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostKs.append(K)</span>
+<span id="cb2-107"><a href="#cb2-107" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostQs.append(Q)</span>
+<span id="cb2-108"><a href="#cb2-108" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.mostVs.append(V)</span>
+<span id="cb2-109"><a href="#cb2-109" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdKs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-110"><a href="#cb2-110" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdQs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-111"><a href="#cb2-111" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>.weirdVs <span class="op">=</span> nn.ParameterList()</span>
+<span id="cb2-112"><a href="#cb2-112" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> layer <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
+<span id="cb2-113"><a href="#cb2-113" aria-hidden="true" tabindex="-1"></a>                K <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">3</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-114"><a href="#cb2-114" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>BIG</span>
+<span id="cb2-115"><a href="#cb2-115" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">0</span>, SRC_FLAG_IDX] <span class="op">=</span> BIG<span class="op">+</span>SUPABIG</span>
+<span id="cb2-116"><a href="#cb2-116" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, NOTANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>SUPABIG</span>
+<span id="cb2-117"><a href="#cb2-117" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, NVTXS <span class="op">+</span> <span class="dv">2</span>] <span class="op">=</span> BIG<span class="op">+</span>SUPABIG</span>
+<span id="cb2-118"><a href="#cb2-118" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">1</span>, ANS_FLAG_IDX] <span class="op">=</span> <span class="op">-</span>BIG<span class="op">-</span>SUPABIG</span>
+<span id="cb2-119"><a href="#cb2-119" aria-hidden="true" tabindex="-1"></a>                K[<span class="dv">2</span>, ANS_FLAG_IDX] <span class="op">=</span> MED</span>
+<span id="cb2-120"><a href="#cb2-120" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-121"><a href="#cb2-121" aria-hidden="true" tabindex="-1"></a>                Q <span class="op">=</span> nn.Parameter(torch.zeros((<span class="dv">3</span>, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-122"><a href="#cb2-122" aria-hidden="true" tabindex="-1"></a>                Q[:, ANS_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-123"><a href="#cb2-123" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-124"><a href="#cb2-124" aria-hidden="true" tabindex="-1"></a>                V <span class="op">=</span> nn.Parameter(torch.zeros((NVTXS, HIDDENDIM), device<span class="op">=</span>device))</span>
+<span id="cb2-125"><a href="#cb2-125" aria-hidden="true" tabindex="-1"></a>                V[layer, SRC_FLAG_IDX] <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb2-126"><a href="#cb2-126" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-127"><a href="#cb2-127" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdKs.append(K)</span>
+<span id="cb2-128"><a href="#cb2-128" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdQs.append(Q)</span>
+<span id="cb2-129"><a href="#cb2-129" aria-hidden="true" tabindex="-1"></a>                <span class="va">self</span>.weirdVs.append(V)</span>
+<span id="cb2-130"><a href="#cb2-130" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-131"><a href="#cb2-131" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> forward(<span class="va">self</span>, src):</span>
+<span id="cb2-132"><a href="#cb2-132" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> layer <span class="kw">in</span> <span class="bu">range</span>(NVTXS):</span>
+<span id="cb2-133"><a href="#cb2-133" aria-hidden="true" tabindex="-1"></a>            allKs <span class="op">=</span> [<span class="va">self</span>.weirdKs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostKs]</span>
+<span id="cb2-134"><a href="#cb2-134" aria-hidden="true" tabindex="-1"></a>            allQs <span class="op">=</span> [<span class="va">self</span>.weirdQs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostQs]</span>
+<span id="cb2-135"><a href="#cb2-135" aria-hidden="true" tabindex="-1"></a>            allVs <span class="op">=</span> [<span class="va">self</span>.weirdVs[layer]] <span class="op">+</span> [x <span class="cf">for</span> x <span class="kw">in</span> <span class="va">self</span>.mostVs]</span>
+<span id="cb2-136"><a href="#cb2-136" aria-hidden="true" tabindex="-1"></a>            head_outputs <span class="op">=</span> []</span>
+<span id="cb2-137"><a href="#cb2-137" aria-hidden="true" tabindex="-1"></a>            </span>
+<span id="cb2-138"><a href="#cb2-138" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> (K, Q, V) <span class="kw">in</span> <span class="bu">zip</span>(allKs, allQs, allVs):</span>
+<span id="cb2-139"><a href="#cb2-139" aria-hidden="true" tabindex="-1"></a>                ksrc <span class="op">=</span> torch.matmul(src, K.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
+<span id="cb2-140"><a href="#cb2-140" aria-hidden="true" tabindex="-1"></a>                qsrc <span class="op">=</span> torch.matmul(src, Q.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
+<span id="cb2-141"><a href="#cb2-141" aria-hidden="true" tabindex="-1"></a>                vsrc <span class="op">=</span> torch.matmul(src, V.unsqueeze(<span class="dv">0</span>).transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
+<span id="cb2-142"><a href="#cb2-142" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-143"><a href="#cb2-143" aria-hidden="true" tabindex="-1"></a>                scores <span class="op">=</span> torch.matmul(qsrc, ksrc.transpose(<span class="op">-</span><span class="dv">2</span>, <span class="op">-</span><span class="dv">1</span>))</span>
+<span id="cb2-144"><a href="#cb2-144" aria-hidden="true" tabindex="-1"></a>                attention_weights <span class="op">=</span> torch.softmax(scores, dim<span class="op">=-</span><span class="dv">1</span>)</span>
+<span id="cb2-145"><a href="#cb2-145" aria-hidden="true" tabindex="-1"></a>                head_output <span class="op">=</span> torch.matmul(attention_weights, vsrc)</span>
+<span id="cb2-146"><a href="#cb2-146" aria-hidden="true" tabindex="-1"></a>                head_outputs.append(head_output)</span>
+<span id="cb2-147"><a href="#cb2-147" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-148"><a href="#cb2-148" aria-hidden="true" tabindex="-1"></a>            new_reaches <span class="op">=</span> <span class="bu">sum</span>(head_outputs[<span class="dv">1</span>:])</span>
+<span id="cb2-149"><a href="#cb2-149" aria-hidden="true" tabindex="-1"></a>            BSZ <span class="op">=</span> new_reaches.shape[<span class="dv">0</span>]</span>
+<span id="cb2-150"><a href="#cb2-150" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-151"><a href="#cb2-151" aria-hidden="true" tabindex="-1"></a>            nodelta_nbrs <span class="op">=</span> torch.zeros((BSZ, SEQLEN, NVTXS <span class="op">+</span> <span class="dv">1</span>), device<span class="op">=</span><span class="va">self</span>.device)</span>
+<span id="cb2-152"><a href="#cb2-152" aria-hidden="true" tabindex="-1"></a>            morepadlol <span class="op">=</span> torch.zeros((BSZ, SEQLEN, <span class="dv">1</span> <span class="op">+</span> NVTXS), device<span class="op">=</span><span class="va">self</span>.device)</span>
+<span id="cb2-153"><a href="#cb2-153" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-154"><a href="#cb2-154" aria-hidden="true" tabindex="-1"></a>            src <span class="op">=</span> src <span class="op">+</span> torch.cat((nodelta_nbrs, new_reaches, head_outputs[<span class="dv">0</span>], morepadlol), dim<span class="op">=</span><span class="dv">2</span>)</span>
+<span id="cb2-155"><a href="#cb2-155" aria-hidden="true" tabindex="-1"></a>            src[:, :, START_REACH:START_REACH <span class="op">+</span> NVTXS] <span class="op">=</span> <span class="dv">2</span> <span class="op">*</span> torch.sigmoid(src[:, :, START_REACH:START_REACH <span class="op">+</span> NVTXS] <span class="op">*</span> CURSE) <span class="op">-</span> <span class="dv">1</span></span>
+<span id="cb2-156"><a href="#cb2-156" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-157"><a href="#cb2-157" aria-hidden="true" tabindex="-1"></a>        canreach <span class="op">=</span> src[:, <span class="dv">0</span>, START_OUT:START_OUT <span class="op">+</span> NVTXS]</span>
+<span id="cb2-158"><a href="#cb2-158" aria-hidden="true" tabindex="-1"></a>        final_output <span class="op">=</span> <span class="dv">1</span> <span class="op">+</span> torch.<span class="bu">sum</span>(<span class="dv">1</span> <span class="op">-</span> canreach, dim<span class="op">=</span><span class="dv">1</span>)</span>
+<span id="cb2-159"><a href="#cb2-159" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> final_output</span>
+<span id="cb2-160"><a href="#cb2-160" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-161"><a href="#cb2-161" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> SillyTransformer(device).to(device)</span>
+<span id="cb2-162"><a href="#cb2-162" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> <span class="bu">sum</span>(p.numel() <span class="cf">for</span> p <span class="kw">in</span> model.parameters())</span>
+<span id="cb2-163"><a href="#cb2-163" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f&quot;Total number of parameters: </span><span class="sc">{</span>params<span class="sc">}</span><span class="ss">&quot;</span>)</span>
+<span id="cb2-164"><a href="#cb2-164" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-165"><a href="#cb2-165" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> destroy_rand_weights(model):</span>
+<span id="cb2-166"><a href="#cb2-166" aria-hidden="true" tabindex="-1"></a>    weight_lists <span class="op">=</span> [model.mostKs, model.mostQs, model.mostVs, </span>
+<span id="cb2-167"><a href="#cb2-167" aria-hidden="true" tabindex="-1"></a>                    model.weirdKs, model.weirdQs, model.weirdVs]</span>
+<span id="cb2-168"><a href="#cb2-168" aria-hidden="true" tabindex="-1"></a>    random_list <span class="op">=</span> random.choice(weight_lists)</span>
+<span id="cb2-169"><a href="#cb2-169" aria-hidden="true" tabindex="-1"></a>    random_matrix <span class="op">=</span> random.choice(random_list)</span>
+<span id="cb2-170"><a href="#cb2-170" aria-hidden="true" tabindex="-1"></a>    random_matrix.data <span class="op">=</span> torch.randn_like(random_matrix)</span>
+<span id="cb2-171"><a href="#cb2-171" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-172"><a href="#cb2-172" aria-hidden="true" tabindex="-1"></a>optimizer <span class="op">=</span> torch.optim.Adam(model.parameters(), lr<span class="op">=</span><span class="fl">1e-6</span>)</span>
+<span id="cb2-173"><a href="#cb2-173" aria-hidden="true" tabindex="-1"></a>loss_fn <span class="op">=</span> nn.MSELoss()</span></code></pre></div>
+<h2 id="references">References</h2>
+<section id="footnotes" class="footnotes footnotes-end-of-document"
+role="doc-endnotes">
+<hr />
+<ol>
+<li id="fn1"><p>Lin, Hilton, and Evans. “TruthfulQA: Measuring How
+Models Mimic Human Falsehoods”<a href="#fnref1" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>Nanda, Neel, Lawrence Chan, Tom Lieberum, Jess Smith,
+and Jacob Steinhardt. “Progress measures for grokking via mechanistic
+interpretability.” arXiv preprint arXiv:2301.05217 (2023).
+https://arxiv.org/abs/2301.05217.<a href="#fnref2" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+</ol>
+</section>
 </body>
 </html>
diff --git a/plots/train-dist.html b/plots/train-dist.html
new file mode 100644
index 0000000..54aee21
--- /dev/null
+++ b/plots/train-dist.html
@@ -0,0 +1,43 @@
+
+
+<style>
+
+</style>
+
+<div id="fig_el1200021399424053838726786165297"></div>
+<script>
+function mpld3_load_lib(url, callback){
+  var s = document.createElement('script');
+  s.src = url;
+  s.async = true;
+  s.onreadystatechange = s.onload = callback;
+  s.onerror = function(){console.warn("failed to load library " + url);};
+  document.getElementsByTagName("head")[0].appendChild(s);
+}
+
+if(typeof(mpld3) !== "undefined" && mpld3._mpld3IsLoaded){
+   // already loaded: just create the figure
+   !function(mpld3){
+       
+       mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+   }(mpld3);
+}else if(typeof define === "function" && define.amd){
+   // require.js is available: use it to load d3/mpld3
+   require.config({paths: {d3: "https://d3js.org/d3.v5"}});
+   require(["d3"], function(d3){
+      window.d3 = d3;
+      mpld3_load_lib("https://mpld3.github.io/js/mpld3.v0.5.10.js", function(){
+         
+         mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+      });
+    });
+}else{
+    // require.js not available: dynamically load d3 & mpld3
+    mpld3_load_lib("https://d3js.org/d3.v5.js", function(){
+         mpld3_load_lib("https://mpld3.github.io/js/mpld3.v0.5.10.js", function(){
+                 
+                 mpld3.draw_figure("fig_el1200021399424053838726786165297", {"width": 640.0, "height": 480.0, "axes": [{"bbox": [0.125, 0.10999999999999999, 0.775, 0.77], "xlim": [0.29999999999999993, 15.7], "ylim": [0.0, 7533.75], "xdomain": [0.29999999999999993, 15.7], "ydomain": [0.0, 7533.75], "xscale": "linear", "yscale": "linear", "axes": [{"position": "bottom", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}, {"position": "left", "nticks": 9, "tickvalues": null, "tickformat_formatter": "", "tickformat": null, "scale": "linear", "fontsize": 10.0, "grid": {"gridOn": false}, "visible": true}], "axesbg": "#FFFFFF", "axesbgalpha": null, "zoomable": true, "id": "el120002139942361034016", "lines": [], "paths": [{"data": "data01", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126032", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data02", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361031376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data03", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942360791184", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data04", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361121472", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data05", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361120896", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data06", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123824", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data07", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361123392", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data08", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127904", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data09", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129296", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data10", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129056", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data11", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361128624", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361127376", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 2, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126752", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data12", "xindex": 3, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361126320", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}, {"data": "data13", "xindex": 0, "yindex": 1, "coordinates": "data", "pathcodes": ["M", "L", "L", "L", "Z"], "id": "el120002139942361129872", "dasharray": "none", "alpha": 1, "facecolor": "#1F77B4", "edgecolor": "none", "edgewidth": 1.0, "zorder": 1}], "markers": [], "texts": [], "collections": [], "images": [], "sharex": [], "sharey": []}], "data": {"data01": [[1.0, 0.0], [1.933333396911621, 0.0], [1.933333396911621, 7175.0], [1.0, 7175.0]], "data02": [[1.933333396911621, 0.0], [2.866666793823242, 0.0], [2.866666793823242, 6576.0], [1.933333396911621, 6576.0]], "data03": [[2.866666793823242, 0.0], [3.799999952316284, 0.0], [3.799999952316284, 4624.0], [2.866666793823242, 4624.0]], "data04": [[3.799999952316284, 0.0], [4.733333587646484, 0.0], [4.733333587646484, 3021.0], [3.799999952316284, 3021.0]], "data05": [[4.733333587646484, 0.0], [5.666666507720947, 0.0], [5.666666507720947, 2275.0], [4.733333587646484, 2275.0]], "data06": [[5.666666507720947, 0.0], [6.599999904632568, 0.0], [6.599999904632568, 1653.0], [5.666666507720947, 1653.0]], "data07": [[6.599999904632568, 0.0], [7.5333333015441895, 0.0], [7.5333333015441895, 492.0], [6.599999904632568, 492.0]], "data08": [[7.5333333015441895, 0.0], [8.466667175292969, 0.0], [8.466667175292969, 119.0], [7.5333333015441895, 119.0]], "data09": [[8.466667175292969, 0.0], [9.399999618530273, 0.0], [9.399999618530273, 28.0], [8.466667175292969, 28.0]], "data10": [[9.399999618530273, 0.0], [10.333333015441895, 0.0], [10.333333015441895, 5.0], [9.399999618530273, 5.0]], "data11": [[10.333333015441895, 0.0], [11.266666412353516, 0.0], [11.266666412353516, 1.0], [10.333333015441895, 1.0]], "data12": [[11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [12.199999809265137, 0.0, 13.133333206176758, 14.066666603088379], [11.266666412353516, 0.0, 12.199999809265137, 13.133333206176758]], "data13": [[14.066666603088379, 0.0], [15.0, 0.0], [15.0, 6799.0], [14.066666603088379, 6799.0]]}, "id": "el120002139942405383872", "plugins": [{"type": "reset"}, {"type": "zoom", "button": true, "enabled": false}, {"type": "boxzoom", "button": true, "enabled": false}]});
+            })
+         });
+}
+</script>
+\ No newline at end of file
diff --git a/transformer_shortest_paths.ipynb b/transformer_shortest_paths.ipynb
index 8a99b4b..8da9ce4 100644
--- a/transformer_shortest_paths.ipynb
+++ b/transformer_shortest_paths.ipynb
@@ -10,17 +10,6 @@
    ]
   },
   {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Question: \n",
-    "\n",
-    "- Do the attention heads learn to attend to the same positional encodings\n",
-    "- do interp -- what is it doing? can we figure out?\n",
-    "- update: I think we should do interp once it's bigger. "
-   ]
-  },
-  {
    "cell_type": "code",
    "execution_count": 1,
    "execution_state": "idle",
@@ -37,9 +26,9 @@
     "from tqdm import tqdm\n",
     "import torch\n",
     "import torch.nn as nn\n",
-    "import matplotlib as mpl\n",
-    "import matplotlib.pyplot as plt\n",
     "from torch.utils.data import DataLoader, TensorDataset\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt, mpld3\n",
     "\n",
     "from math import sqrt\n",
     "from collections import deque\n",
@@ -72,7 +61,7 @@
     "SEQ_LEN = MAX_VTXS + 1 # means 32 edges, final token is the target vertex\n",
     "PAD_TOKEN = 0\n",
     "EPOCH_SIZE = 2**20\n",
-    "# vertices are labelled 1,2,...,63\n",
+    "# vertices are labelled 1,2,...,MAX_VTXS\n",
     "# we also have a padding token which is 0."
    ]
   },
@@ -110,64 +99,18 @@
     "        u = indices[i]\n",
     "        v = indices[i + 1]\n",
     "        if u != v:\n",
-    "            edge_list += [min(u,v),max(u,v)]\n",
+    "            if u < v:\n",
+    "                edge_list += [u, v]\n",
+    "            else:\n",
+    "                edge_list += [v, u]\n",
     "            adjacencies[u].add(v)\n",
     "            adjacencies[v].add(u)\n",
     "    edge_list += [PAD_TOKEN]*(2*SEQ_LEN-1-len(edge_list))\n",
     "    return edge_list, adjacencies\n",
     "\n",
-    "# input: G, represented as an adjacency list\n",
-    "# output: [number of vertices]+[d(1,i) for i in range(n)] if target=None\n",
-    "# if target is set to some value, then we instead just output that specific distance\n",
-    "def SSSP(G, target=2):\n",
-    "    dist = [MAX_VTXS for _ in G]\n",
-    "    dist[1] = 0\n",
-    "    frontier = deque()\n",
-    "    frontier.append(1)\n",
-    "    while len(frontier) > 0:\n",
-    "        vtx = frontier.popleft()\n",
-    "        for x in G[vtx]:\n",
-    "            if dist[x] == MAX_VTXS:\n",
-    "                dist[x] = 1 + dist[vtx]\n",
-    "                frontier.append(x)\n",
-    "                if x == target:\n",
-    "                    return dist[target]\n",
-    "    if target is not None:\n",
-    "        return dist[target]\n",
-    "    else:\n",
-    "        return dist\n",
-    "\n",
-    "def mkbatch(size):\n",
-    "    graphs1 = []\n",
-    "    distance1 = []\n",
-    "    \n",
-    "    for i in range(size):\n",
-    "        n = random.randint(MIN_VTXS, MAX_VTXS)\n",
-    "        edge_list, adj_list = random_graph(n)\n",
-    "        dist = SSSP(adj_list)\n",
-    "        edge_list[-1] = 2 # target token\n",
-    "        graphs1.append(edge_list)\n",
-    "        distance1.append(dist)\n",
-    "    \n",
-    "    data = torch.tensor(graphs1)\n",
-    "    labels = torch.tensor(distance1, dtype=torch.float32)\n",
-    "    padding = data == PAD_TOKEN\n",
-    "    return data, labels, padding\n",
-    "\n",
-    "def savebatch(size, idx):\n",
-    "    data, labels, padding = mkbatch(size)\n",
-    "    everything = {\n",
-    "        \"data\": data,\n",
-    "        \"labels\": labels,\n",
-    "        \"padding\": padding,\n",
-    "    }\n",
-    "    \n",
-    "    with open(f'data31/{idx}.pickle', 'wb') as file:\n",
-    "        pickle.dump(everything, file)\n",
-    "\n",
-    "def vertices_on_shortest_12_path(G, target=2):\n",
-    "    dist = [MAX_VTXS for _ in G]\n",
-    "    parent = [-1 for _ in G]\n",
+    "def SSSP(G, target=2, distonly=True):\n",
+    "    dist = [MAX_VTXS] * len(G)\n",
+    "    parent = [-1] * len(G)\n",
     "    dist[1] = 0\n",
     "    frontier = deque()\n",
     "    frontier.append(1)\n",
@@ -179,41 +122,68 @@
     "                dist[x] = 1 + dist[vtx]\n",
     "                frontier.append(x)\n",
     "                if x == target:\n",
+    "                    if distonly:\n",
+    "                        return dist[x]\n",
     "                    path = [x]\n",
     "                    while parent[x] != -1:\n",
     "                        x = parent[x]\n",
     "                        path.append(x)\n",
     "                    return list(reversed(path))\n",
-    "    return []\n",
+    "    return MAX_VTXS if distonly else []\n",
     "\n",
-    "def mktunebatch(size, test=False):\n",
+    "# large: size up to MAX_VTXS instead of MAX_TUNE_VTXS\n",
+    "# target: 2, \"onpath\", \"any\"\n",
+    "# largetarget: whether can go up to MAX_VTXS or just MAX_TUNE_VTXS\n",
+    "def mkbatch(size, large=True, target=2, largetarget=True):\n",
     "    graphs = []\n",
-    "    distance = []\n",
+    "    dists = []\n",
     "    \n",
     "    for i in range(size):\n",
-    "        n = random.randint(MIN_VTXS, MAX_VTXS if test else MAX_TUNE_VTXS)\n",
+    "        n = random.randint(MIN_VTXS, MAX_VTXS if large else MAX_TUNE_VTXS)\n",
     "        while True:\n",
     "            edge_list, adj_list = random_graph(n)\n",
-    "            path = vertices_on_shortest_12_path(adj_list)\n",
-    "            if len(path) > 1:\n",
-    "                target_vtx_idx = random.randrange(1, len(path))\n",
-    "                target_vtx = path[target_vtx_idx]\n",
-    "                if target_vtx > MAX_TUNE_VTXS:\n",
+    "            if target == 2:\n",
+    "                # Train data\n",
+    "                dist = SSSP(adj_list)\n",
+    "                # Filter out some short distance graphs\n",
+    "                if 6 <= dist < MAX_VTXS or (dist < 6 and 2 * random.random() < 1 / (6 - dist)) or (dist == MAX_VTXS and random.random() < 1 / MAX_VTXS):\n",
+    "                    edge_list[-1] = 2\n",
+    "                    break\n",
+    "            elif target == \"onpath\":\n",
+    "                path = SSSP(adj_list, 2, False)\n",
+    "                if len(path) < 1:\n",
     "                    continue\n",
-    "                edge_list[-1] = target_vtx\n",
-    "                graphs.append(edge_list)\n",
-    "                distance.append(target_vtx_idx)\n",
+    "                dist = random.randrange(1, len(path))\n",
+    "                if largetarget or path[dist] <= MAX_TUNE_VTXS:\n",
+    "                    edge_list[-1] = path[dist]\n",
+    "                    break\n",
+    "            elif target == \"any\":\n",
+    "                edge_list[-1] = random.randint(2, MAX_VTXS if largetarget else MAX_TUNE_VTXS)\n",
+    "                dist = SSSP(adj_list, edge_list[-1])\n",
     "                break\n",
+    "        graphs.append(edge_list)\n",
+    "        dists.append(dist)\n",
     "    \n",
     "    data = torch.tensor(graphs)\n",
-    "    labels = torch.tensor(distance, dtype=torch.float32)\n",
+    "    labels = torch.tensor(dists, dtype=torch.float32)\n",
     "    padding = data == PAD_TOKEN\n",
-    "    return data, labels, padding"
+    "    return data, labels, padding\n",
+    "\n",
+    "def savebatch(size, idx):\n",
+    "    data, labels, padding = mkbatch(size)\n",
+    "    everything = {\n",
+    "        \"data\": data,\n",
+    "        \"labels\": labels,\n",
+    "        \"padding\": padding,\n",
+    "    }\n",
+    "    \n",
+    "    with open(f'data-new/{idx}.pickle', 'wb') as file:\n",
+    "        pickle.dump(everything, file)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "execution_state": "idle",
    "metadata": {},
    "outputs": [],
@@ -223,60 +193,48 @@
     "# Python is slow and awful\n",
     "\n",
     "# with ProcessPoolExecutor() as executor:\n",
-    "#     for i in range(1000):\n",
+    "#     for i in range(64):\n",
     "#         executor.submit(savebatch, EPOCH_SIZE, i)\n",
     "#     executor.shutdown()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 95,
    "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(tensor([[ 1,  2,  5, 12,  3, 12,  2, 11,  9, 12,  2, 10,  1,  7,  1,  2,  9, 10,\n",
-       "           1,  9,  4, 12,  0,  0,  0,  0,  0,  0,  0,  0,  2],\n",
-       "         [ 9, 12,  4,  7,  8, 10,  5, 13,  1, 13,  3, 13,  7, 12,  5,  6,  3,  4,\n",
-       "           6, 13,  2,  7,  0,  0,  0,  0,  0,  0,  0,  0,  2],\n",
-       "         [ 1,  5,  8, 12,  2,  9,  2,  7,  5,  9, 10, 11,  6, 10,  4, 12,  1,  2,\n",
-       "           4, 11,  2,  5,  2,  4,  0,  0,  0,  0,  0,  0,  2],\n",
-       "         [ 5,  8,  3,  6,  4,  5,  2,  3,  4,  9,  3,  8,  5,  7,  4,  9,  0,  0,\n",
-       "           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2],\n",
-       "         [ 6, 13,  1, 13,  1,  4,  6, 13,  5,  7,  2,  4, 10, 12,  4,  6,  8, 11,\n",
-       "           7, 11,  3,  8,  3,  5,  4, 12,  0,  0,  0,  0,  2]]),\n",
-       " tensor([ 1.,  5.,  1., 15.,  2.]),\n",
-       " tensor([[False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
-       "          False],\n",
-       "         [False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
-       "          False],\n",
-       "         [False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False,  True,  True,  True,  True,  True,  True,\n",
+       "(tensor([[1, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "          0, 0, 0, 0, 0, 0, 2],\n",
+       "         [1, 5, 3, 4, 1, 3, 4, 5, 3, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "          0, 0, 0, 0, 0, 0, 2],\n",
+       "         [2, 9, 1, 3, 4, 8, 8, 9, 3, 9, 5, 7, 6, 7, 1, 6, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "          0, 0, 0, 0, 0, 0, 2]]),\n",
+       " tensor([ 1., 15.,  3.]),\n",
+       " tensor([[False, False, False, False,  True,  True,  True,  True,  True,  True,\n",
+       "           True,  True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
+       "           True,  True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
        "          False],\n",
        "         [False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False, False, False,  True,  True,  True,  True,\n",
+       "           True,  True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
        "           True,  True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
        "          False],\n",
        "         [False, False, False, False, False, False, False, False, False, False,\n",
-       "          False, False, False, False, False, False, False, False, False, False,\n",
        "          False, False, False, False, False, False,  True,  True,  True,  True,\n",
+       "           True,  True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
        "          False]]))"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 95,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "mkbatch(5)"
+    "mkbatch(3)"
    ]
   },
   {
@@ -287,34 +245,42 @@
    "outputs": [
     {
      "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGdCAYAAAAMm0nCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqu0lEQVR4nO3de3BUZZ7/8U8upAmX7kg03WQJkFlcIYgXwIUe1F00Sw/TuroEZ3AQWUAtqEZNsgsxu5hx0DGIowgqZPBCmBpYhSpxJCnAGCCsQ7gYNg6gRFyjYSZ2x11NGhhJQtK/P36Ts7SCkpCkecL7VXWq7PN8zznfpyukP56cczoqFAqFBAAAYJDoSDcAAADQXgQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxYiPdQFdpbW1VbW2t+vfvr6ioqEi3AwAAzkMoFNLx48eVnJys6Ohzn2fpsQGmtrZWKSkpkW4DAAB0wLFjxzRo0KBzjvfYANO/f3/pL2+A3W6PdDsAAOA8BINBpaSkWJ/j59JjA0zbn43sdjsBBgAAw3zf5R9cxAsAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgnNhIN2CioY8UR+S4ny7xRuS4AABcbDgDAwAAjEOAAQAAxiHAAAAA4xBgAACAcdoVYIYOHaqoqKhvLT6fT5J06tQp+Xw+JSYmql+/fsrIyFAgEAjbR01Njbxer/r06aOkpCQtWLBAp0+fDqvZuXOnRo8eLZvNpmHDhqmwsLAz5goAAHqIdgWY/fv36/PPP7eWkpISSdJdd90lScrKytLmzZu1ceNGlZWVqba2VlOmTLG2b2lpkdfrVVNTk3bv3q21a9eqsLBQeXl5Vk11dbW8Xq8mTpyoyspKZWZm6r777tO2bds6b9YAAMBoUaFQKNTRjTMzM1VUVKSjR48qGAzqiiuu0Pr16zV16lRJ0pEjRzRixAiVl5dr/Pjx2rJli2677TbV1tbK6XRKkgoKCpSTk6MvvvhCcXFxysnJUXFxsQ4dOmQdZ9q0aaqvr9fWrVvPu7dgMCiHw6GGhgbZ7faOTvGsuI0aAICucb6f3x2+BqapqUm//e1vNXv2bEVFRamiokLNzc1KT0+3aoYPH67BgwervLxcklReXq5Ro0ZZ4UWSPB6PgsGgDh8+bNWcuY+2mrZ9nEtjY6OCwWDYAgAAeqYOB5g333xT9fX1+ud//mdJkt/vV1xcnBISEsLqnE6n/H6/VXNmeGkbbxv7rppgMKivv/76nP3k5+fL4XBYS0pKSkenBgAALnIdDjCvvPKKJk+erOTk5M7tqINyc3PV0NBgLceOHYt0SwAAoIt06KsEPvvsM73zzjt64403rHUul0tNTU2qr68POwsTCATkcrmsmn379oXtq+0upTNrvnnnUiAQkN1uV3x8/Dl7stlsstlsHZkOAAAwTIfOwKxZs0ZJSUnyev/votIxY8aoV69eKi0ttdZVVVWppqZGbrdbkuR2u3Xw4EHV1dVZNSUlJbLb7UpLS7NqztxHW03bPgAAANodYFpbW7VmzRrNnDlTsbH/dwLH4XBozpw5ys7O1o4dO1RRUaFZs2bJ7XZr/PjxkqRJkyYpLS1NM2bM0Pvvv69t27Zp0aJF8vl81tmTuXPn6pNPPtHChQt15MgRrVy5Uhs2bFBWVlZnzhsAABis3X9Ceuedd1RTU6PZs2d/a2zZsmWKjo5WRkaGGhsb5fF4tHLlSms8JiZGRUVFmjdvntxut/r27auZM2dq8eLFVk1qaqqKi4uVlZWl5cuXa9CgQXr55Zfl8XguZJ4AAKAHuaDnwFzMeA4MAADm6fLnwAAAAEQKAQYAABiHAAMAAIzToefAAACAi8Olel0mZ2AAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDh8lYBBIvW4aF0Ej4wGAOBMnIEBAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYp90B5k9/+pPuueceJSYmKj4+XqNGjdJ7771njYdCIeXl5WngwIGKj49Xenq6jh49GraPL7/8UtOnT5fdbldCQoLmzJmjEydOhNX84Q9/0E033aTevXsrJSVFS5cuvZB5AgCAHqRdAearr77ShAkT1KtXL23ZskUffPCBnnnmGV122WVWzdKlS7VixQoVFBRo79696tu3rzwej06dOmXVTJ8+XYcPH1ZJSYmKioq0a9cuPfDAA9Z4MBjUpEmTNGTIEFVUVOjpp5/WY489ptWrV3fWvAEAgMFi21P81FNPKSUlRWvWrLHWpaamWv8dCoX03HPPadGiRbrjjjskSb/5zW/kdDr15ptvatq0afrwww+1detW7d+/X2PHjpUkPf/88/rxj3+sX/3qV0pOTta6devU1NSkV199VXFxcRo5cqQqKyv17LPPhgUdAABwaWrXGZi33npLY8eO1V133aWkpCRdf/31eumll6zx6upq+f1+paenW+scDofGjRun8vJySVJ5ebkSEhKs8CJJ6enpio6O1t69e62am2++WXFxcVaNx+NRVVWVvvrqq7P21tjYqGAwGLYAAICeqV0B5pNPPtGqVat05ZVXatu2bZo3b54eeughrV27VpLk9/slSU6nM2w7p9Npjfn9fiUlJYWNx8bGasCAAWE1Z9vHmcf4pvz8fDkcDmtJSUlpz9QAAIBB2hVgWltbNXr0aD355JO6/vrr9cADD+j+++9XQUFB13V4nnJzc9XQ0GAtx44di3RLAACgi7QrwAwcOFBpaWlh60aMGKGamhpJksvlkiQFAoGwmkAgYI25XC7V1dWFjZ8+fVpffvllWM3Z9nHmMb7JZrPJbreHLQAAoGdqV4CZMGGCqqqqwtZ99NFHGjJkiPSXC3pdLpdKS0ut8WAwqL1798rtdkuS3G636uvrVVFRYdVs375dra2tGjdunFWza9cuNTc3WzUlJSW66qqrwu54AgAAl6Z2BZisrCzt2bNHTz75pD7++GOtX79eq1evls/nkyRFRUUpMzNTTzzxhN566y0dPHhQ9957r5KTk3XnnXdKfzlj86Mf/Uj333+/9u3bp9///veaP3++pk2bpuTkZEnSz372M8XFxWnOnDk6fPiwXn/9dS1fvlzZ2dld8R4AAADDtOs26htuuEGbNm1Sbm6uFi9erNTUVD333HOaPn26VbNw4UKdPHlSDzzwgOrr63XjjTdq69at6t27t1Wzbt06zZ8/X7feequio6OVkZGhFStWWOMOh0Nvv/22fD6fxowZo8svv1x5eXncQg0AACRJUaFQKBTpJrpCMBiUw+FQQ0NDp18PM/SR4k7dnwk+XeKNdAsAgLOI1GdSV30unO/nN9+FBAAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGCcdgWYxx57TFFRUWHL8OHDrfFTp07J5/MpMTFR/fr1U0ZGhgKBQNg+ampq5PV61adPHyUlJWnBggU6ffp0WM3OnTs1evRo2Ww2DRs2TIWFhRc6TwAA0IO0+wzMyJEj9fnnn1vLu+++a41lZWVp8+bN2rhxo8rKylRbW6spU6ZY4y0tLfJ6vWpqatLu3bu1du1aFRYWKi8vz6qprq6W1+vVxIkTVVlZqczMTN13333atm1bZ8wXAAD0ALHt3iA2Vi6X61vrGxoa9Morr2j9+vW65ZZbJElr1qzRiBEjtGfPHo0fP15vv/22PvjgA73zzjtyOp267rrr9PjjjysnJ0ePPfaY4uLiVFBQoNTUVD3zzDOSpBEjRujdd9/VsmXL5PF4OmPOAADAcO0+A3P06FElJyfrBz/4gaZPn66amhpJUkVFhZqbm5Wenm7VDh8+XIMHD1Z5ebkkqby8XKNGjZLT6bRqPB6PgsGgDh8+bNWcuY+2mrZ9nEtjY6OCwWDYAgAAeqZ2BZhx48apsLBQW7du1apVq1RdXa2bbrpJx48fl9/vV1xcnBISEsK2cTqd8vv9kiS/3x8WXtrG28a+qyYYDOrrr78+Z2/5+flyOBzWkpKS0p6pAQAAg7TrT0iTJ0+2/vuaa67RuHHjNGTIEG3YsEHx8fFd0d95y83NVXZ2tvU6GAwSYgAA6KEu6DbqhIQE/c3f/I0+/vhjuVwuNTU1qb6+PqwmEAhY18y4XK5v3ZXU9vr7aux2+3eGJJvNJrvdHrYAAICe6YICzIkTJ/Tf//3fGjhwoMaMGaNevXqptLTUGq+qqlJNTY3cbrckye126+DBg6qrq7NqSkpKZLfblZaWZtWcuY+2mrZ9AAAAtCvA/Ou//qvKysr06aefavfu3fqnf/onxcTE6O6775bD4dCcOXOUnZ2tHTt2qKKiQrNmzZLb7db48eMlSZMmTVJaWppmzJih999/X9u2bdOiRYvk8/lks9kkSXPnztUnn3yihQsX6siRI1q5cqU2bNigrKysrnkHAACAcdp1Dcwf//hH3X333frf//1fXXHFFbrxxhu1Z88eXXHFFZKkZcuWKTo6WhkZGWpsbJTH49HKlSut7WNiYlRUVKR58+bJ7Xarb9++mjlzphYvXmzVpKamqri4WFlZWVq+fLkGDRqkl19+mVuoAQCAJSoUCoUi3URXCAaDcjgcamho6PTrYYY+Utyp+zPBp0u8kW4BAHAWkfpM6qrPhfP9/Oa7kAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgnHZ9mSMuXT3tuzYAAGbjDAwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABjnggLMkiVLFBUVpczMTGvdqVOn5PP5lJiYqH79+ikjI0OBQCBsu5qaGnm9XvXp00dJSUlasGCBTp8+HVazc+dOjR49WjabTcOGDVNhYeGFtAoAAHqQDgeY/fv369e//rWuueaasPVZWVnavHmzNm7cqLKyMtXW1mrKlCnWeEtLi7xer5qamrR7926tXbtWhYWFysvLs2qqq6vl9Xo1ceJEVVZWKjMzU/fdd5+2bdvW0XYBAEAP0qEAc+LECU2fPl0vvfSSLrvsMmt9Q0ODXnnlFT377LO65ZZbNGbMGK1Zs0a7d+/Wnj17JElvv/22PvjgA/32t7/Vddddp8mTJ+vxxx/Xiy++qKamJklSQUGBUlNT9cwzz2jEiBGaP3++pk6dqmXLlnXWvAEAgME6FGB8Pp+8Xq/S09PD1ldUVKi5uTls/fDhwzV48GCVl5dLksrLyzVq1Cg5nU6rxuPxKBgM6vDhw1bNN/ft8XisfZxNY2OjgsFg2AIAAHqm2PZu8Nprr+nAgQPav3//t8b8fr/i4uKUkJAQtt7pdMrv91s1Z4aXtvG2se+qCQaD+vrrrxUfH/+tY+fn5+sXv/hFe6cDAAAM1K4zMMeOHdPDDz+sdevWqXfv3l3XVQfk5uaqoaHBWo4dOxbplgAAQBdpV4CpqKhQXV2dRo8erdjYWMXGxqqsrEwrVqxQbGysnE6nmpqaVF9fH7ZdIBCQy+WSJLlcrm/dldT2+vtq7Hb7Wc++SJLNZpPdbg9bAABAz9SuAHPrrbfq4MGDqqystJaxY8dq+vTp1n/36tVLpaWl1jZVVVWqqamR2+2WJLndbh08eFB1dXVWTUlJiex2u9LS0qyaM/fRVtO2DwAAcGlr1zUw/fv319VXXx22rm/fvkpMTLTWz5kzR9nZ2RowYIDsdrsefPBBud1ujR8/XpI0adIkpaWlacaMGVq6dKn8fr8WLVokn88nm80mSZo7d65eeOEFLVy4ULNnz9b27du1YcMGFRcXd97MAQCAsdp9Ee/3WbZsmaKjo5WRkaHGxkZ5PB6tXLnSGo+JiVFRUZHmzZsnt9utvn37aubMmVq8eLFVk5qaquLiYmVlZWn58uUaNGiQXn75ZXk8ns5uFwAAGCgqFAqFIt1EVwgGg3I4HGpoaOj062GGPsKZoO7y6RJvpFsAgItapD6Tuur38/l+fvNdSAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGaVeAWbVqla655hrZ7XbZ7Xa53W5t2bLFGj916pR8Pp8SExPVr18/ZWRkKBAIhO2jpqZGXq9Xffr0UVJSkhYsWKDTp0+H1ezcuVOjR4+WzWbTsGHDVFhYeKHzBAAAPUi7AsygQYO0ZMkSVVRU6L333tMtt9yiO+64Q4cPH5YkZWVlafPmzdq4caPKyspUW1urKVOmWNu3tLTI6/WqqalJu3fv1tq1a1VYWKi8vDyrprq6Wl6vVxMnTlRlZaUyMzN13333adu2bZ05bwAAYLCoUCgUupAdDBgwQE8//bSmTp2qK664QuvXr9fUqVMlSUeOHNGIESNUXl6u8ePHa8uWLbrttttUW1srp9MpSSooKFBOTo6++OILxcXFKScnR8XFxTp06JB1jGnTpqm+vl5bt249776CwaAcDocaGhpkt9svZIrfMvSR4k7dH87t0yXeSLcAABe1SH0mddXv5/P9/I7t6AFaWlq0ceNGnTx5Um63WxUVFWpublZ6erpVM3z4cA0ePNgKMOXl5Ro1apQVXiTJ4/Fo3rx5Onz4sK6//nqVl5eH7aOtJjMz8zv7aWxsVGNjo/U6GAx2dGq4iPS0f5gAgM7R7ot4Dx48qH79+slms2nu3LnatGmT0tLS5Pf7FRcXp4SEhLB6p9Mpv98vSfL7/WHhpW28bey7aoLBoL7++utz9pWfny+Hw2EtKSkp7Z0aAAAwRLsDzFVXXaXKykrt3btX8+bN08yZM/XBBx90TXftkJubq4aGBms5duxYpFsCAABdpN1/QoqLi9OwYcMkSWPGjNH+/fu1fPly/fSnP1VTU5Pq6+vDzsIEAgG5XC5Jksvl0r59+8L213aX0pk137xzKRAIyG63Kz4+/px92Ww22Wy29k4HAAAY6IKfA9Pa2qrGxkaNGTNGvXr1UmlpqTVWVVWlmpoaud1uSZLb7dbBgwdVV1dn1ZSUlMhutystLc2qOXMfbTVt+wAAAGjXGZjc3FxNnjxZgwcP1vHjx7V+/Xrt3LlT27Ztk8Ph0Jw5c5Sdna0BAwbIbrfrwQcflNvt1vjx4yVJkyZNUlpammbMmKGlS5fK7/dr0aJF8vl81tmTuXPn6oUXXtDChQs1e/Zsbd++XRs2bFBxMXf+AACA/69dAaaurk733nuvPv/8czkcDl1zzTXatm2b/uEf/kGStGzZMkVHRysjI0ONjY3yeDxauXKltX1MTIyKioo0b948ud1u9e3bVzNnztTixYutmtTUVBUXFysrK0vLly/XoEGD9PLLL8vj8XTmvAEAgMEu+DkwFyueA4MLwW3UAEzR0x43cb6f33wXEgAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA48RGugHgYjT0keKIHPfTJd6IHBcATMMZGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOO0KMPn5+brhhhvUv39/JSUl6c4771RVVVVYzalTp+Tz+ZSYmKh+/fopIyNDgUAgrKampkZer1d9+vRRUlKSFixYoNOnT4fV7Ny5U6NHj5bNZtOwYcNUWFh4IfMEAAA9SLsCTFlZmXw+n/bs2aOSkhI1Nzdr0qRJOnnypFWTlZWlzZs3a+PGjSorK1Ntba2mTJlijbe0tMjr9aqpqUm7d+/W2rVrVVhYqLy8PKumurpaXq9XEydOVGVlpTIzM3Xfffdp27ZtnTVvAABgsKhQKBTq6MZffPGFkpKSVFZWpptvvlkNDQ264oortH79ek2dOlWSdOTIEY0YMULl5eUaP368tmzZottuu021tbVyOp2SpIKCAuXk5OiLL75QXFyccnJyVFxcrEOHDlnHmjZtmurr67V169bz6i0YDMrhcKihoUF2u72jUzyrSD1mHj0fXyUAoL162lefnO/n9wVdA9PQ0CBJGjBggCSpoqJCzc3NSk9Pt2qGDx+uwYMHq7y8XJJUXl6uUaNGWeFFkjwej4LBoA4fPmzVnLmPtpq2fZxNY2OjgsFg2AIAAHqmDgeY1tZWZWZmasKECbr66qslSX6/X3FxcUpISAirdTqd8vv9Vs2Z4aVtvG3su2qCwaC+/vrrs/aTn58vh8NhLSkpKR2dGgAAuMh1OMD4fD4dOnRIr732Wud21EG5ublqaGiwlmPHjkW6JQAA0EViO7LR/PnzVVRUpF27dmnQoEHWepfLpaamJtXX14edhQkEAnK5XFbNvn37wvbXdpfSmTXfvHMpEAjIbrcrPj7+rD3ZbDbZbLaOTAcAABimXWdgQqGQ5s+fr02bNmn79u1KTU0NGx8zZox69eql0tJSa11VVZVqamrkdrslSW63WwcPHlRdXZ1VU1JSIrvdrrS0NKvmzH201bTtAwAAXNradQbG5/Np/fr1+t3vfqf+/ftb16w4HA7Fx8fL4XBozpw5ys7O1oABA2S32/Xggw/K7XZr/PjxkqRJkyYpLS1NM2bM0NKlS+X3+7Vo0SL5fD7rDMrcuXP1wgsvaOHChZo9e7a2b9+uDRs2qLiYu38AAEA7z8CsWrVKDQ0N+vu//3sNHDjQWl5//XWrZtmyZbrtttuUkZGhm2++WS6XS2+88YY1HhMTo6KiIsXExMjtduuee+7Rvffeq8WLF1s1qampKi4uVklJia699lo988wzevnll+XxeDpr3gAAwGAX9ByYixnPgYGJeA4MgPbiOTAAAACGIMAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOO068scAXStnvZIcADoKpyBAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcdodYHbt2qXbb79dycnJioqK0ptvvhk2HgqFlJeXp4EDByo+Pl7p6ek6evRoWM2XX36p6dOny263KyEhQXPmzNGJEyfCav7whz/opptuUu/evZWSkqKlS5d2dI4AAKCHaXeAOXnypK699lq9+OKLZx1funSpVqxYoYKCAu3du1d9+/aVx+PRqVOnrJrp06fr8OHDKikpUVFRkXbt2qUHHnjAGg8Gg5o0aZKGDBmiiooKPf3003rssce0evXqjs4TAAD0ILHt3WDy5MmaPHnyWcdCoZCee+45LVq0SHfccYck6Te/+Y2cTqfefPNNTZs2TR9++KG2bt2q/fv3a+zYsZKk559/Xj/+8Y/1q1/9SsnJyVq3bp2ampr06quvKi4uTiNHjlRlZaWeffbZsKADAAAuTZ16DUx1dbX8fr/S09OtdQ6HQ+PGjVN5ebkkqby8XAkJCVZ4kaT09HRFR0dr7969Vs3NN9+suLg4q8bj8aiqqkpfffXVWY/d2NioYDAYtgAAgJ6pUwOM3++XJDmdzrD1TqfTGvP7/UpKSgobj42N1YABA8JqzraPM4/xTfn5+XI4HNaSkpLSiTMDAAAXkx5zF1Jubq4aGhqs5dixY5FuCQAAdJFODTAul0uSFAgEwtYHAgFrzOVyqa6uLmz89OnT+vLLL8NqzraPM4/xTTabTXa7PWwBAAA9U6cGmNTUVLlcLpWWllrrgsGg9u7dK7fbLUlyu92qr69XRUWFVbN9+3a1trZq3LhxVs2uXbvU3Nxs1ZSUlOiqq67SZZdd1pktAwAAA7U7wJw4cUKVlZWqrKyU/nLhbmVlpWpqahQVFaXMzEw98cQTeuutt3Tw4EHde++9Sk5O1p133ilJGjFihH70ox/p/vvv1759+/T73/9e8+fP17Rp05ScnCxJ+tnPfqa4uDjNmTNHhw8f1uuvv67ly5crOzu7s+cPAAAM1O7bqN977z1NnDjRet0WKmbOnKnCwkItXLhQJ0+e1AMPPKD6+nrdeOON2rp1q3r37m1ts27dOs2fP1+33nqroqOjlZGRoRUrVljjDodDb7/9tnw+n8aMGaPLL79ceXl53EINAAAkSVGhUCgU6Sa6QjAYlMPhUENDQ6dfDzP0keJO3R8QaZ8u8Ua6BQAdFKnPpK76vXG+n9895i4kAABw6SDAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOPERroBAJE39JHiiBz30yXeiBwXgPk4AwMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMExvpBgBcuoY+UhyR4366xBuR4wLoPJyBAQAAxrmoA8yLL76ooUOHqnfv3ho3bpz27dsX6ZYAAMBF4KINMK+//rqys7P185//XAcOHNC1114rj8ejurq6SLcGAAAi7KINMM8++6zuv/9+zZo1S2lpaSooKFCfPn306quvRro1AAAQYRflRbxNTU2qqKhQbm6utS46Olrp6ekqLy8/6zaNjY1qbGy0Xjc0NEiSgsFgp/fX2vjnTt8ngO4zOGtjRI576BeeiBwXPVukPpO64vP1zP2GQqHvrLsoA8z//M//qKWlRU6nM2y90+nUkSNHzrpNfn6+fvGLX3xrfUpKSpf1CQDt4Xgu0h0Anaerf56PHz8uh8NxzvGLMsB0RG5urrKzs63Xra2t+vLLL5WYmKioqKiI9tbZgsGgUlJSdOzYMdnt9ki30+2Y/6U9f/EeXPLzF+9Bj55/KBTS8ePHlZyc/J11F2WAufzyyxUTE6NAIBC2PhAIyOVynXUbm80mm80Wti4hIaFL+4w0u93e435w24P5X9rzF+/BJT9/8R702Pl/15mXNhflRbxxcXEaM2aMSktLrXWtra0qLS2V2+2OaG8AACDyLsozMJKUnZ2tmTNnauzYsfrbv/1bPffcczp58qRmzZoV6dYAAECEXbQB5qc//am++OIL5eXlye/367rrrtPWrVu/dWHvpchms+nnP//5t/5kdqlg/pf2/MV7cMnPX7wHl/z8JSkq9H33KQEAAFxkLsprYAAAAL4LAQYAABiHAAMAAIxDgAEAAMYhwBgiPz9fN9xwg/r376+kpCTdeeedqqqqinRbEbNkyRJFRUUpMzMz0q10qz/96U+65557lJiYqPj4eI0aNUrvvfdepNvqFi0tLXr00UeVmpqq+Ph4/fVf/7Uef/zx7/2+FJPt2rVLt99+u5KTkxUVFaU333wzbDwUCikvL08DBw5UfHy80tPTdfTo0Yj129m+a/7Nzc3KycnRqFGj1LdvXyUnJ+vee+9VbW1tRHvubN/3M3CmuXPnKioqSs89d2l8ZwUBxhBlZWXy+Xzas2ePSkpK1NzcrEmTJunkyZORbq3b7d+/X7/+9a91zTXXRLqVbvXVV19pwoQJ6tWrl7Zs2aIPPvhAzzzzjC677LJIt9YtnnrqKa1atUovvPCCPvzwQz311FNaunSpnn/++Ui31mVOnjypa6+9Vi+++OJZx5cuXaoVK1aooKBAe/fuVd++feXxeHTq1Klu77UrfNf8//znP+vAgQN69NFHdeDAAb3xxhuqqqrSP/7jP0ak167yfT8DbTZt2qQ9e/Z87+P3e5QQjFRXVxeSFCorK4t0K93q+PHjoSuvvDJUUlIS+ru/+7vQww8/HOmWuk1OTk7oxhtvjHQbEeP1ekOzZ88OWzdlypTQ9OnTI9ZTd5IU2rRpk/W6tbU15HK5Qk8//bS1rr6+PmSz2UL/8R//EaEuu8435382+/btC0kKffbZZ93WV3c613vwxz/+MfRXf/VXoUOHDoWGDBkSWrZsWUT6626cgTFUQ0ODJGnAgAGRbqVb+Xw+eb1epaenR7qVbvfWW29p7Nixuuuuu5SUlKTrr79eL730UqTb6jY//OEPVVpaqo8++kiS9P777+vdd9/V5MmTI91aRFRXV8vv94f9W3A4HBo3bpzKy8sj2lukNDQ0KCoqqsd/D96ZWltbNWPGDC1YsEAjR46MdDvd6qJ9Ei/OrbW1VZmZmZowYYKuvvrqSLfTbV577TUdOHBA+/fvj3QrEfHJJ59o1apVys7O1r/9279p//79euihhxQXF6eZM2dGur0u98gjjygYDGr48OGKiYlRS0uLfvnLX2r69OmRbi0i/H6/JH3r6eROp9Mau5ScOnVKOTk5uvvuu3vklxuey1NPPaXY2Fg99NBDkW6l2xFgDOTz+XTo0CG9++67kW6l2xw7dkwPP/ywSkpK1Lt370i3ExGtra0aO3asnnzySUnS9ddfr0OHDqmgoOCSCDAbNmzQunXrtH79eo0cOVKVlZXKzMxUcnLyJTF/nFtzc7N+8pOfKBQKadWqVZFup9tUVFRo+fLlOnDggKKioiLdTrfjT0iGmT9/voqKirRjxw4NGjQo0u10m4qKCtXV1Wn06NGKjY1VbGysysrKtGLFCsXGxqqlpSXSLXa5gQMHKi0tLWzdiBEjVFNTE7GeutOCBQv0yCOPaNq0aRo1apRmzJihrKws5efnR7q1iHC5XJKkQCAQtj4QCFhjl4K28PLZZ5+ppKTkkjr78p//+Z+qq6vT4MGDrd+Ln332mf7lX/5FQ4cOjXR7XY4zMIYIhUJ68MEHtWnTJu3cuVOpqamRbqlb3XrrrTp48GDYulmzZmn48OHKyclRTExMxHrrLhMmTPjWrfMfffSRhgwZErGeutOf//xnRUeH/z9XTEyMWltbI9ZTJKWmpsrlcqm0tFTXXXedJCkYDGrv3r2aN29epNvrFm3h5ejRo9qxY4cSExMj3VK3mjFjxreuB/R4PJoxY4ZmzZoVsb66CwHGED6fT+vXr9fvfvc79e/f3/obt8PhUHx8fKTb63L9+/f/1vU+ffv2VWJi4iVzHVBWVpZ++MMf6sknn9RPfvIT7du3T6tXr9bq1asj3Vq3uP322/XLX/5SgwcP1siRI/Vf//VfevbZZzV79uxIt9ZlTpw4oY8//th6XV1drcrKSg0YMECDBw9WZmamnnjiCV155ZVKTU3Vo48+quTkZN15550R7buzfNf8Bw4cqKlTp+rAgQMqKipSS0uL9XtxwIABiouLi2Dnnef7fga+Gdp69eoll8ulq666KgLddrNI3waF8yPprMuaNWsi3VrEXGq3UYdCodDmzZtDV199dchms4WGDx8eWr16daRb6jbBYDD08MMPhwYPHhzq3bt36Ac/+EHo3//930ONjY2Rbq3L7Nix46z/7mfOnBkK/eVW6kcffTTkdDpDNpstdOutt4aqqqoi3Xan+a75V1dXn/P34o4dOyLdeqf5vp+Bb7qUbqOOCvXkx1gCAIAeiYt4AQCAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADDO/wPP0WtNPThbKQAAAABJRU5ErkJggg==",
       "text/plain": [
-       "(array([523.,   0.,   0.,   0., 390.,   0.,   0.,   0.,   0., 238.,   0.,\n",
-       "          0.,   0.,  92.,   0.,   0.,   0.,   0.,  40.,   0.,   0.,   0.,\n",
-       "         15.,   0.,   0.,   0.,   0.,   5.,   0.,   0.,   0.,   0.,   2.,\n",
-       "          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,\n",
-       "          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,\n",
-       "          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0., 743.]),\n",
-       " array([ 1.     ,  1.21875,  1.4375 ,  1.65625,  1.875  ,  2.09375,\n",
-       "         2.3125 ,  2.53125,  2.75   ,  2.96875,  3.1875 ,  3.40625,\n",
-       "         3.625  ,  3.84375,  4.0625 ,  4.28125,  4.5    ,  4.71875,\n",
-       "         4.9375 ,  5.15625,  5.375  ,  5.59375,  5.8125 ,  6.03125,\n",
-       "         6.25   ,  6.46875,  6.6875 ,  6.90625,  7.125  ,  7.34375,\n",
-       "         7.5625 ,  7.78125,  8.     ,  8.21875,  8.4375 ,  8.65625,\n",
-       "         8.875  ,  9.09375,  9.3125 ,  9.53125,  9.75   ,  9.96875,\n",
-       "        10.1875 , 10.40625, 10.625  , 10.84375, 11.0625 , 11.28125,\n",
-       "        11.5    , 11.71875, 11.9375 , 12.15625, 12.375  , 12.59375,\n",
-       "        12.8125 , 13.03125, 13.25   , 13.46875, 13.6875 , 13.90625,\n",
-       "        14.125  , 14.34375, 14.5625 , 14.78125, 15.     ]),\n",
-       " <BarContainer object of 64 artists>)"
+       "<Figure size 640x480 with 1 Axes>"
       ]
      },
-     "execution_count": 6,
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.hist(mkbatch(2**15)[1].cpu(), bins=MAX_VTXS)\n",
+    "with open(\"train-dist.html\", \"w\") as f:\n",
+    "    mpld3.save_html(plt.gcf(), f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "execution_state": "idle",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([2.4747e+04, 6.5340e+03, 1.2840e+03, 1.8300e+02, 2.0000e+01]),\n",
+       " array([1., 2., 3., 4., 5., 6.]),\n",
+       " <BarContainer object of 5 artists>)"
+      ]
+     },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     },
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAl0UlEQVR4nO3df1RU953/8Rc/B0RnCGyYkRWQ7rpFEoyJpjIxu+0qK7WsW480qTmU0MSNp57RBGmtsqsm1UQMu42pXdSa46p7Gtat56xpxcQEiSFtBSS47jGaJWZrAgmZoWctjJrDgDDfP75l2vFHklFkPsDzcc49J3M/n2Hel2PMM8MME+H3+/0CAAAwSGS4BwAAALgSgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAONHhHuBGDAwMqKOjQxMmTFBERES4xwEAAJ+D3+/XhQsXlJqaqsjIT3+OZEQGSkdHh9LS0sI9BgAAuAHt7e2aNGnSp+4ZkYEyYcIE6fcXaLVawz0OAAD4HLxer9LS0gL/Hf80IzJQBn+sY7VaCRQAAEaYz/PyDF4kCwAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA40SHewAAABAek9ccuu7a+5sLhnWWK/EMCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA44QUKJMnT1ZERMRVh8vlkiT19PTI5XIpOTlZ48ePV2FhoTweT9DXaGtrU0FBgcaNG6eUlBStWrVKly9fHtqrAgAAI1pIgdLc3KyPP/44cNTW1kqSHnjgAUnSypUrdfDgQe3fv1/19fXq6OjQokWLAvfv7+9XQUGBent7dezYMe3du1d79uzR+vXrh/q6AADACBbh9/v9N3rn0tJS1dTU6OzZs/J6vbr99ttVXV2tb3zjG5Kk//mf/9HUqVPV0NCg3NxcvfLKK/rbv/1bdXR0yG63S5J27Nih1atX67e//a1iY2M/1+N6vV7ZbDZ1d3fLarXe6PgAAIxpk9ccuu7a+5sLhvzxQvnv9w2/BqW3t1c//elP9eijjyoiIkItLS3q6+tTXl5eYE9WVpbS09PV0NAgSWpoaFBOTk4gTiQpPz9fXq9Xp0+fvtFRAADAKBN9o3d86aWX1NXVpW9/+9uSJLfbrdjYWCUmJgbts9vtcrvdgT1/HCeD64Nr1+Pz+eTz+QK3vV7vjY4NAABGgBt+BmXXrl2aP3++UlNTh3aia6ioqJDNZgscaWlpt/wxAQBA+NxQoHzwwQc6cuSI/v7v/z5wzuFwqLe3V11dXUF7PR6PHA5HYM+V7+oZvD2451rKy8vV3d0dONrb229kbAAAMELcUKDs3r1bKSkpKij4wwtoZsyYoZiYGNXV1QXOtba2qq2tTU6nU5LkdDp16tQpdXZ2BvbU1tbKarUqOzv7uo9nsVhktVqDDgAAMHqF/BqUgYEB7d69WyUlJYqO/sPdbTablixZorKyMiUlJclqtWrFihVyOp3Kzc2VJM2bN0/Z2dkqLi5WZWWl3G631q5dK5fLJYvFMrRXBgAARqyQA+XIkSNqa2vTo48+etXali1bFBkZqcLCQvl8PuXn52vbtm2B9aioKNXU1GjZsmVyOp1KSEhQSUmJNmzYcPNXAgAARo2b+j0o4cLvQQEA4OaNyt+DAgAAcKsQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOOEHCgfffSRvvWtbyk5OVnx8fHKycnRW2+9FVj3+/1av369Jk6cqPj4eOXl5ens2bNBX+P8+fMqKiqS1WpVYmKilixZoosXLw7NFQEAgBEvpED53e9+p9mzZysmJkavvPKKzpw5ox/+8Ie67bbbAnsqKyu1detW7dixQ01NTUpISFB+fr56enoCe4qKinT69GnV1taqpqZGb775ppYuXTq0VwYAAEasCL/f7/+8m9esWaNf//rX+uUvf3nNdb/fr9TUVH33u9/V9773PUlSd3e37Ha79uzZo8WLF+udd95Rdna2mpubNXPmTEnS4cOH9bWvfU0ffvihUlNTP3MOr9crm82m7u5uWa3Wz3+1AAAgYPKaQ9dde39zwZA/Xij//Q7pGZRf/OIXmjlzph544AGlpKTo7rvv1gsvvBBYP3funNxut/Ly8gLnbDabZs2apYaGBklSQ0ODEhMTA3EiSXl5eYqMjFRTU9M1H9fn88nr9QYdAABg9AopUH7zm99o+/btmjJlil599VUtW7ZMjz/+uPbu3StJcrvdkiS73R50P7vdHlhzu91KSUkJWo+OjlZSUlJgz5UqKipks9kCR1paWmhXCQAARpSQAmVgYED33HOPNm3apLvvvltLly7VY489ph07dty6CSWVl5eru7s7cLS3t9/SxwMAAOEVUqBMnDhR2dnZQeemTp2qtrY2SZLD4ZAkeTyeoD0ejyew5nA41NnZGbR++fJlnT9/PrDnShaLRVarNegAAACjV0iBMnv2bLW2tgade/fdd5WRkSFJyszMlMPhUF1dXWDd6/WqqalJTqdTkuR0OtXV1aWWlpbAntdff10DAwOaNWvWzV4PAAAYBaJD2bxy5Urdd9992rRpkx588EEdP35cO3fu1M6dOyVJERERKi0t1dNPP60pU6YoMzNT69atU2pqqhYuXCj9/hmXr371q4EfDfX19Wn58uVavHjx53oHDwAAGP1CCpR7771XBw4cUHl5uTZs2KDMzEw9//zzKioqCuz5/ve/r0uXLmnp0qXq6urS/fffr8OHDysuLi6w58UXX9Ty5cs1d+5cRUZGqrCwUFu3bh3aKwMAACNWSL8HxRT8HhQAAG7eqPk9KAAAAMOBQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxokO9wAmmrzm0HXX3t9cMKyzAAAwFvEMCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwTkiB8tRTTykiIiLoyMrKCqz39PTI5XIpOTlZ48ePV2FhoTweT9DXaGtrU0FBgcaNG6eUlBStWrVKly9fHrorAgAAI17IbzO+4447dOTIkT98geg/fImVK1fq0KFD2r9/v2w2m5YvX65Fixbp17/+tSSpv79fBQUFcjgcOnbsmD7++GM9/PDDiomJ0aZNm4bqmgAAwAgXcqBER0fL4XBcdb67u1u7du1SdXW15syZI0navXu3pk6dqsbGRuXm5uq1117TmTNndOTIEdntdk2fPl0bN27U6tWr9dRTTyk2NnZorgoAAIxoIb8G5ezZs0pNTdUXvvAFFRUVqa2tTZLU0tKivr4+5eXlBfZmZWUpPT1dDQ0NkqSGhgbl5OTIbrcH9uTn58vr9er06dPXfUyfzyev1xt0AACA0SukQJk1a5b27Nmjw4cPa/v27Tp37pz+8i//UhcuXJDb7VZsbKwSExOD7mO32+V2uyVJbrc7KE4G1wfXrqeiokI2my1wpKWlhTI2AAAYYUL6Ec/8+fMD/zxt2jTNmjVLGRkZ+tnPfqb4+PhbMZ8kqby8XGVlZYHbXq+XSAEAYBS7qbcZJyYm6i/+4i/03nvvyeFwqLe3V11dXUF7PB5P4DUrDofjqnf1DN6+1utaBlksFlmt1qADAACMXjcVKBcvXtT//u//auLEiZoxY4ZiYmJUV1cXWG9tbVVbW5ucTqckyel06tSpU+rs7Azsqa2tldVqVXZ29s2MAgAARpGQfsTzve99TwsWLFBGRoY6Ojr05JNPKioqSg899JBsNpuWLFmisrIyJSUlyWq1asWKFXI6ncrNzZUkzZs3T9nZ2SouLlZlZaXcbrfWrl0rl8sli8Vyq64RAACMMCEFyocffqiHHnpI//d//6fbb79d999/vxobG3X77bdLkrZs2aLIyEgVFhbK5/MpPz9f27ZtC9w/KipKNTU1WrZsmZxOpxISElRSUqINGzYM/ZUBAIARK6RA2bdv36eux8XFqaqqSlVVVdfdk5GRoZdffjmUhwUAAGMMn8UDAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMc1OBsnnzZkVERKi0tDRwrqenRy6XS8nJyRo/frwKCwvl8XiC7tfW1qaCggKNGzdOKSkpWrVqlS5fvnwzowAAgFHkhgOlublZP/nJTzRt2rSg8ytXrtTBgwe1f/9+1dfXq6OjQ4sWLQqs9/f3q6CgQL29vTp27Jj27t2rPXv2aP369Td3JQAAYNS4oUC5ePGiioqK9MILL+i2224LnO/u7tauXbv03HPPac6cOZoxY4Z2796tY8eOqbGxUZL02muv6cyZM/rpT3+q6dOna/78+dq4caOqqqrU29s7dFcGAABGrBsKFJfLpYKCAuXl5QWdb2lpUV9fX9D5rKwspaenq6GhQZLU0NCgnJwc2e32wJ78/Hx5vV6dPn36mo/n8/nk9XqDDgAAMHpFh3qHffv26cSJE2pubr5qze12KzY2VomJiUHn7Xa73G53YM8fx8ng+uDatVRUVOgHP/hBqKMCAIARKqRnUNrb2/XEE0/oxRdfVFxc3K2b6grl5eXq7u4OHO3t7cP22AAAYPiFFCgtLS3q7OzUPffco+joaEVHR6u+vl5bt25VdHS07Ha7ent71dXVFXQ/j8cjh8MhSXI4HFe9q2fw9uCeK1ksFlmt1qADAACMXiEFyty5c3Xq1CmdPHkycMycOVNFRUWBf46JiVFdXV3gPq2trWpra5PT6ZQkOZ1OnTp1Sp2dnYE9tbW1slqtys7OHsprAwAAI1RIr0GZMGGC7rzzzqBzCQkJSk5ODpxfsmSJysrKlJSUJKvVqhUrVsjpdCo3N1eSNG/ePGVnZ6u4uFiVlZVyu91au3atXC6XLBbLUF4bAAAYoUJ+kexn2bJliyIjI1VYWCifz6f8/Hxt27YtsB4VFaWamhotW7ZMTqdTCQkJKikp0YYNG4Z6lBFj8ppD1117f3PBsM4CAIAJbjpQ3njjjaDbcXFxqqqqUlVV1XXvk5GRoZdffvlmHxoAAIxSfBYPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOCEFyvbt2zVt2jRZrVZZrVY5nU698sorgfWenh65XC4lJydr/PjxKiwslMfjCfoabW1tKigo0Lhx45SSkqJVq1bp8uXLQ3dFAABgxAspUCZNmqTNmzerpaVFb731lubMmaOvf/3rOn36tCRp5cqVOnjwoPbv36/6+np1dHRo0aJFgfv39/eroKBAvb29OnbsmPbu3as9e/Zo/fr1Q39lAABgxIoOZfOCBQuCbj/zzDPavn27GhsbNWnSJO3atUvV1dWaM2eOJGn37t2aOnWqGhsblZubq9dee01nzpzRkSNHZLfbNX36dG3cuFGrV6/WU089pdjY2KG9OgAAMCLd8GtQ+vv7tW/fPl26dElOp1MtLS3q6+tTXl5eYE9WVpbS09PV0NAgSWpoaFBOTo7sdntgT35+vrxeb+BZmGvx+Xzyer1BBwAAGL1CDpRTp05p/Pjxslgs+s53vqMDBw4oOztbbrdbsbGxSkxMDNpvt9vldrslSW63OyhOBtcH166noqJCNpstcKSlpYU6NgAAGEFCDpQvfvGLOnnypJqamrRs2TKVlJTozJkzt2a63ysvL1d3d3fgaG9vv6WPBwAAwiuk16BIUmxsrP78z/9ckjRjxgw1NzfrRz/6kb75zW+qt7dXXV1dQc+ieDweORwOSZLD4dDx48eDvt7gu3wG91yLxWKRxWIJdVQAADBC3fTvQRkYGJDP59OMGTMUExOjurq6wFpra6va2trkdDolSU6nU6dOnVJnZ2dgT21traxWq7Kzs292FAAAMEqE9AxKeXm55s+fr/T0dF24cEHV1dV644039Oqrr8pms2nJkiUqKytTUlKSrFarVqxYIafTqdzcXEnSvHnzlJ2dreLiYlVWVsrtdmvt2rVyuVw8QwIAAAJCCpTOzk49/PDD+vjjj2Wz2TRt2jS9+uqr+pu/+RtJ0pYtWxQZGanCwkL5fD7l5+dr27ZtgftHRUWppqZGy5Ytk9PpVEJCgkpKSrRhw4ahvzIAADBihRQou3bt+tT1uLg4VVVVqaqq6rp7MjIy9PLLL4fysAAAYIzhs3gAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYJzrcA8Ask9ccuu7a+5sLhnUWAMDYxTMoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA44QUKBUVFbr33ns1YcIEpaSkaOHChWptbQ3a09PTI5fLpeTkZI0fP16FhYXyeDxBe9ra2lRQUKBx48YpJSVFq1at0uXLl4fmigAAwIgXUqDU19fL5XKpsbFRtbW16uvr07x583Tp0qXAnpUrV+rgwYPav3+/6uvr1dHRoUWLFgXW+/v7VVBQoN7eXh07dkx79+7Vnj17tH79+qG9MgAAMGJFh7L58OHDQbf37NmjlJQUtbS06K/+6q/U3d2tXbt2qbq6WnPmzJEk7d69W1OnTlVjY6Nyc3P12muv6cyZMzpy5IjsdrumT5+ujRs3avXq1XrqqacUGxs7tFcIAABGnJt6DUp3d7ckKSkpSZLU0tKivr4+5eXlBfZkZWUpPT1dDQ0NkqSGhgbl5OTIbrcH9uTn58vr9er06dM3Mw4AABglQnoG5Y8NDAyotLRUs2fP1p133ilJcrvdio2NVWJiYtBeu90ut9sd2PPHcTK4Prh2LT6fTz6fL3Db6/Xe6NgAAGAEuOFnUFwul95++23t27dvaCe6hoqKCtlstsCRlpZ2yx8TAACEzw0FyvLly1VTU6OjR49q0qRJgfMOh0O9vb3q6uoK2u/xeORwOAJ7rnxXz+DtwT1XKi8vV3d3d+Bob2+/kbEBAMAIEVKg+P1+LV++XAcOHNDrr7+uzMzMoPUZM2YoJiZGdXV1gXOtra1qa2uT0+mUJDmdTp06dUqdnZ2BPbW1tbJarcrOzr7m41osFlmt1qADAACMXiG9BsXlcqm6ulo///nPNWHChMBrRmw2m+Lj42Wz2bRkyRKVlZUpKSlJVqtVK1askNPpVG5uriRp3rx5ys7OVnFxsSorK+V2u7V27Vq5XC5ZLJZbc5UAAGBECSlQtm/fLkn6yle+EnR+9+7d+va3vy1J2rJliyIjI1VYWCifz6f8/Hxt27YtsDcqKko1NTVatmyZnE6nEhISVFJSog0bNgzNFQEAgBEvpEDx+/2fuScuLk5VVVWqqqq67p6MjAy9/PLLoTw0AAAYQ/gsHgAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYJyQA+XNN9/UggULlJqaqoiICL300ktB636/X+vXr9fEiRMVHx+vvLw8nT17NmjP+fPnVVRUJKvVqsTERC1ZskQXL168+asBAACjQsiBcunSJd11112qqqq65nplZaW2bt2qHTt2qKmpSQkJCcrPz1dPT09gT1FRkU6fPq3a2lrV1NTozTff1NKlS2/uSgAAwKgRHeod5s+fr/nz519zze/36/nnn9fatWv19a9/XZL0b//2b7Lb7XrppZe0ePFivfPOOzp8+LCam5s1c+ZMSdKPf/xjfe1rX9M///M/KzU19WavCQAAjHBD+hqUc+fOye12Ky8vL3DOZrNp1qxZamhokCQ1NDQoMTExECeSlJeXp8jISDU1NV3z6/p8Pnm93qADAACMXkMaKG63W5Jkt9uDztvt9sCa2+1WSkpK0Hp0dLSSkpICe65UUVEhm80WONLS0oZybAAAYJgR8S6e8vJydXd3B4729vZwjwQAAG6hIQ0Uh8MhSfJ4PEHnPR5PYM3hcKizszNo/fLlyzp//nxgz5UsFousVmvQAQAARq8hDZTMzEw5HA7V1dUFznm9XjU1NcnpdEqSnE6nurq61NLSEtjz+uuva2BgQLNmzRrKcQAAwAgV8rt4Ll68qPfeey9w+9y5czp58qSSkpKUnp6u0tJSPf3005oyZYoyMzO1bt06paamauHChZKkqVOn6qtf/aoee+wx7dixQ319fVq+fLkWL17MO3jGoMlrDl137f3NBcM6CwDAHCEHyltvvaW//uu/DtwuKyuTJJWUlGjPnj36/ve/r0uXLmnp0qXq6urS/fffr8OHDysuLi5wnxdffFHLly/X3LlzFRkZqcLCQm3dunWorgkAAIxwIQfKV77yFfn9/uuuR0REaMOGDdqwYcN19yQlJam6ujrUhwYAAGPEiHgXDwAAGFsIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxosM9AGCKyWsOfer6+5sLhm0WABjreAYFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcfgsHiCMPu3zf/jsHwBjGc+gAAAA4xAoAADAOGENlKqqKk2ePFlxcXGaNWuWjh8/Hs5xAACAIcIWKP/xH/+hsrIyPfnkkzpx4oTuuusu5efnq7OzM1wjAQAAQ4TtRbLPPfecHnvsMT3yyCOSpB07dujQoUP613/9V61ZsyZcYwFjFi/YBWCSsARKb2+vWlpaVF5eHjgXGRmpvLw8NTQ0XLXf5/PJ5/MFbnd3d0uSvF7vLZlvwPfJddduxWMO9+OZNMtIuXaN8esf7lnufPLV6669/YP8YZ0FGM2G+9/7wa/p9/s/e7M/DD766CO/JP+xY8eCzq9atcr/pS996ar9Tz75pF8SBwcHBwcHxyg42tvbP7MVRsTvQSkvL1dZWVng9sDAgM6fP6/k5GRFRESEdbah5vV6lZaWpvb2dlmt1nCPM+zG+vWL78GYv37xPeD6R/H1+/1+XbhwQampqZ+5NyyB8id/8ieKioqSx+MJOu/xeORwOK7ab7FYZLFYgs4lJibe8jnDyWq1jro/mKEY69cvvgdj/vrF94DrH6XXb7PZPte+sLyLJzY2VjNmzFBdXV3g3MDAgOrq6uR0OsMxEgAAMEjYfsRTVlamkpISzZw5U1/60pf0/PPP69KlS4F39QAAgLErbIHyzW9+U7/97W+1fv16ud1uTZ8+XYcPH5bdbg/XSEawWCx68sknr/qR1lgx1q9ffA/G/PWL7wHXP8avf1CE/3O91wcAAGD48Fk8AADAOAQKAAAwDoECAACMQ6AAAADjECgGqKio0L333qsJEyYoJSVFCxcuVGtra7jHCqvNmzcrIiJCpaWl4R5l2Hz00Uf61re+peTkZMXHxysnJ0dvvfVWuMcaNv39/Vq3bp0yMzMVHx+vP/uzP9PGjRs/32d2jEBvvvmmFixYoNTUVEVEROill14KWvf7/Vq/fr0mTpyo+Ph45eXl6ezZs2Gb91b4tO9BX1+fVq9erZycHCUkJCg1NVUPP/ywOjo6wjrzUPqsPwN/7Dvf+Y4iIiL0/PPPD+uM4USgGKC+vl4ul0uNjY2qra1VX1+f5s2bp0uXLoV7tLBobm7WT37yE02bNi3cowyb3/3ud5o9e7ZiYmL0yiuv6MyZM/rhD3+o2267LdyjDZtnn31W27dv17/8y7/onXfe0bPPPqvKykr9+Mc/Dvdot8SlS5d01113qaqq6prrlZWV2rp1q3bs2KGmpiYlJCQoPz9fPT09wz7rrfJp34NPPvlEJ06c0Lp163TixAn953/+p1pbW/V3f/d3YZn1VvisPwODDhw4oMbGxs/16+FHlaH8EEAMjc7OTr8kf319fbhHGXYXLlzwT5kyxV9bW+v/8pe/7H/iiSfCPdKwWL16tf/+++8P9xhhVVBQ4H/00UeDzi1atMhfVFQUtpmGiyT/gQMHArcHBgb8DofD/0//9E+Bc11dXX6LxeL/93//9zBNeWtd+T24luPHj/sl+T/44INhm2u4XO/6P/zwQ/+f/umf+t9++21/RkaGf8uWLWGZLxx4BsVA3d3dkqSkpKRwjzLsXC6XCgoKlJeXF+5RhtUvfvELzZw5Uw888IBSUlJ0991364UXXgj3WMPqvvvuU11dnd59911J0n//93/rV7/6lebPnx/u0YbduXPn5Ha7g/49sNlsmjVrlhoaGsI6Wzh1d3crIiJi1H8W26CBgQEVFxdr1apVuuOOO8I9zrAbEZ9mPJYMDAyotLRUs2fP1p133hnucYbVvn37dOLECTU3N4d7lGH3m9/8Rtu3b1dZWZn+4R/+Qc3NzXr88ccVGxurkpKScI83LNasWSOv16usrCxFRUWpv79fzzzzjIqKisI92rBzu92SdNVv1rbb7YG1saanp0erV6/WQw89NCo/QO9ann32WUVHR+vxxx8P9yhhQaAYxuVy6e2339avfvWrcI8yrNrb2/XEE0+otrZWcXFx4R5n2A0MDGjmzJnatGmTJOnuu+/W22+/rR07doyZQPnZz36mF198UdXV1brjjjt08uRJlZaWKjU1dcx8D3BtfX19evDBB+X3+7V9+/ZwjzMsWlpa9KMf/UgnTpxQREREuMcJC37EY5Dly5erpqZGR48e1aRJk8I9zrBqaWlRZ2en7rnnHkVHRys6Olr19fXaunWroqOj1d/fH+4Rb6mJEycqOzs76NzUqVPV1tYWtpmG26pVq7RmzRotXrxYOTk5Ki4u1sqVK1VRURHu0Yadw+GQJHk8nqDzHo8nsDZWDMbJBx98oNra2jHz7Mkvf/lLdXZ2Kj09PfB34gcffKDvfve7mjx5crjHGxY8g2IAv9+vFStW6MCBA3rjjTeUmZkZ7pGG3dy5c3Xq1Kmgc4888oiysrK0evVqRUVFhW224TB79uyr3lr+7rvvKiMjI2wzDbdPPvlEkZHB/88UFRWlgYGBsM0ULpmZmXI4HKqrq9P06dMlSV6vV01NTVq2bFm4xxs2g3Fy9uxZHT16VMnJyeEeadgUFxdf9Vq8/Px8FRcX65FHHgnbXMOJQDGAy+VSdXW1fv7zn2vChAmBnzHbbDbFx8eHe7xhMWHChKtec5OQkKDk5OQx8VqclStX6r777tOmTZv04IMP6vjx49q5c6d27twZ7tGGzYIFC/TMM88oPT1dd9xxh/7rv/5Lzz33nB599NFwj3ZLXLx4Ue+9917g9rlz53Ty5EklJSUpPT1dpaWlevrppzVlyhRlZmZq3bp1Sk1N1cKFC8M691D6tO/BxIkT9Y1vfEMnTpxQTU2N+vv7A383JiUlKTY2NoyTD43P+jNwZZDFxMTI4XDoi1/8YhimDYNwv40I///tZdc6du/eHe7Rwmosvc3Y7/f7Dx486L/zzjv9FovFn5WV5d+5c2e4RxpWXq/X/8QTT/jT09P9cXFx/i984Qv+f/zHf/T7fL5wj3ZLHD169Jr/3peUlPj9v3+r8bp16/x2u91vsVj8c+fO9be2toZ77CH1ad+Dc+fOXffvxqNHj4Z79CHxWX8GrjTW3mYc4R+tv6YRAACMWLxIFgAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYJz/B0FPZLxO9+txAAAAAElFTkSuQmCC",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGdCAYAAADwjmIIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkD0lEQVR4nO3dfVSUdf7/8Rc3DZgxQ94AspJabiJ5GxpOllvJcVRyY3N31VwXjerUATelvKE8aG2F2fGkHU3X2iPtObGpndUKEiVMWBPvMFaxdLP0YKuDlskIW6gwvz9+ca3zFSsUmfj4fJxznW3m+sw177n27PJsmLkI8Hq9XgEAABgm0N8DAAAAXAlEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjBft7AH9qaGjQ0aNHFRYWpoCAAH+PAwAAfgKv16vTp08rOjpagYEXf7/mqo6co0ePKiYmxt9jAACAS3DkyBF17dr1ovuv6sgJCwuTvj9Jdrvd3+MAAICfwOPxKCYmxvo5fjFXdeQ0/orKbrcTOQAAtDE/9lETPngMAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEjNipzs7GwNHjxYYWFhioiIUHJysg4cOOCz5q677lJAQIDP9uijj/qsqaysVFJSkq699lpFRERoxowZOnfunM+azZs369Zbb1VISIh69uypnJycC+ZZunSpunfvrtDQUCUkJGjHjh3Ne/UAAMBYzYqc4uJipaWladu2bSosLNTZs2c1YsQI1dbW+qx7+OGHdezYMWtbsGCBta++vl5JSUk6c+aMtm7dqjfeeEM5OTnKysqy1hw6dEhJSUm6++67VV5ermnTpumhhx7Shg0brDWrVq1SRkaG5s6dq927d6t///5yuVw6fvz45Z0RAABghACv1+u91AefOHFCERERKi4u1rBhw6Tv38kZMGCAFi1a1ORj1q9fr3vvvVdHjx5VZGSkJGn58uWaNWuWTpw4IZvNplmzZik/P18VFRXW48aPH69Tp06poKBAkpSQkKDBgwdryZIl0vd/bDMmJkZTp07V7Nmzf9L8Ho9HDodD1dXVXPEYAIA24qf+/L6sz+RUV1dLkjp06OBz/5tvvqlOnTqpT58+yszM1H//+19rX2lpqfr27WsFjiS5XC55PB7t27fPWpOYmOhzTJfLpdLSUknSmTNnVFZW5rMmMDBQiYmJ1pqm1NXVyePx+GwAAMBMl/y3qxoaGjRt2jQNHTpUffr0se5/4IEH1K1bN0VHR2vPnj2aNWuWDhw4oH/84x+SJLfb7RM4kqzbbrf7B9d4PB59++23+uabb1RfX9/kmv3791905uzsbD3zzDOX+pIBAEAbcsmRk5aWpoqKCm3ZssXn/kceecT65759+6pLly4aPny4Pv/8c910002XN+1lyszMVEZGhnW78a+YAgAA81xS5KSnpysvL08lJSXq2rXrD65NSEiQJB08eFA33XSToqKiLvgWVFVVlSQpKirK+s/G+85fY7fb1a5dOwUFBSkoKKjJNY3HaEpISIhCQkKa+WoBAEBb1KzI8Xq9mjp1qtauXavNmzerR48eP/qY8vJySVKXLl0kSU6nU88//7yOHz+uiIgISVJhYaHsdrvi4uKsNe+//77PcQoLC+V0OiVJNptN8fHxKioqUnJysvT9r8+KioqUnp7enJd0xXSfne/vEa4Kh+cn+XsEAMDPVLMiJy0tTbm5uXrnnXcUFhZmfYbG4XCoXbt2+vzzz5Wbm6vRo0erY8eO2rNnj6ZPn65hw4apX79+kqQRI0YoLi5OkyZN0oIFC+R2uzVnzhylpaVZ77I8+uijWrJkiWbOnKkHH3xQmzZt0urVq5Wf/79wyMjIUEpKigYNGqTbbrtNixYtUm1traZMmdKyZwgAALRJzYqcZcuWSd9/Tfx8K1eu1OTJk2Wz2fTBBx9YwRETE6OxY8dqzpw51tqgoCDl5eXpsccek9PpVPv27ZWSkqJnn33WWtOjRw/l5+dr+vTpWrx4sbp27arXX39dLpfLWjNu3DidOHFCWVlZcrvdGjBggAoKCi74MDIAALg6XdZ1ctq6K3mdHH5d1Tr4dRUAXH1a5To5AAAAP1dEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACM1K3Kys7M1ePBghYWFKSIiQsnJyTpw4IDPmu+++05paWnq2LGjrrvuOo0dO1ZVVVU+ayorK5WUlKRrr71WERERmjFjhs6dO+ezZvPmzbr11lsVEhKinj17Kicn54J5li5dqu7duys0NFQJCQnasWNH8149AAAwVrMip7i4WGlpadq2bZsKCwt19uxZjRgxQrW1tdaa6dOn67333tOaNWtUXFyso0eP6v7777f219fXKykpSWfOnNHWrVv1xhtvKCcnR1lZWdaaQ4cOKSkpSXfffbfKy8s1bdo0PfTQQ9qwYYO1ZtWqVcrIyNDcuXO1e/du9e/fXy6XS8ePH7/8swIAANq8AK/X673UB584cUIREREqLi7WsGHDVF1drc6dOys3N1e//e1vJUn79+9X7969VVpaqiFDhmj9+vW69957dfToUUVGRkqSli9frlmzZunEiROy2WyaNWuW8vPzVVFRYT3X+PHjderUKRUUFEiSEhISNHjwYC1ZskSS1NDQoJiYGE2dOlWzZ8/+SfN7PB45HA5VV1fLbrdf6mloUvfZ+S16PDTt8Pwkf48AAGhlP/Xn92V9Jqe6ulqS1KFDB0lSWVmZzp49q8TERGtNbGysbrjhBpWWlkqSSktL1bdvXytwJMnlcsnj8Wjfvn3WmvOP0bim8RhnzpxRWVmZz5rAwEAlJiZaa5pSV1cnj8fjswEAADNdcuQ0NDRo2rRpGjp0qPr06SNJcrvdstlsCg8P91kbGRkpt9ttrTk/cBr3N+77oTUej0fffvutvvrqK9XX1ze5pvEYTcnOzpbD4bC2mJiYS335AADgZ+6SIyctLU0VFRV66623WnaiKygzM1PV1dXWduTIEX+PBAAArpDgS3lQenq68vLyVFJSoq5du1r3R0VF6cyZMzp16pTPuzlVVVWKioqy1vzfb0E1fvvq/DX/9xtZVVVVstvtateunYKCghQUFNTkmsZjNCUkJEQhISGX8pIBAEAb06x3crxer9LT07V27Vpt2rRJPXr08NkfHx+va665RkVFRdZ9Bw4cUGVlpZxOpyTJ6XRq7969Pt+CKiwslN1uV1xcnLXm/GM0rmk8hs1mU3x8vM+ahoYGFRUVWWsAAMDVrVnv5KSlpSk3N1fvvPOOwsLCrM+/OBwOtWvXTg6HQ6mpqcrIyFCHDh1kt9s1depUOZ1ODRkyRJI0YsQIxcXFadKkSVqwYIHcbrfmzJmjtLQ0612WRx99VEuWLNHMmTP14IMPatOmTVq9erXy8//3jaWMjAylpKRo0KBBuu2227Ro0SLV1tZqypQpLXuGAABAm9SsyFm2bJkk6a677vK5f+XKlZo8ebIk6eWXX1ZgYKDGjh2ruro6uVwuvfrqq9baoKAg5eXl6bHHHpPT6VT79u2VkpKiZ5991lrTo0cP5efna/r06Vq8eLG6du2q119/XS6Xy1ozbtw4nThxQllZWXK73RowYIAKCgou+DAyAAC4Ol3WdXLaOq6T0/ZxnRwAuPq0ynVyAAAAfq6IHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEZqduSUlJRozJgxio6OVkBAgNatW+ezf/LkyQoICPDZRo4c6bPm5MmTmjhxoux2u8LDw5WamqqamhqfNXv27NGdd96p0NBQxcTEaMGCBRfMsmbNGsXGxio0NFR9+/bV+++/39yXAwAADNXsyKmtrVX//v21dOnSi64ZOXKkjh07Zm1///vfffZPnDhR+/btU2FhofLy8lRSUqJHHnnE2u/xeDRixAh169ZNZWVleumllzRv3jytWLHCWrN161ZNmDBBqamp+vjjj5WcnKzk5GRVVFQ09yUBAAADBXi9Xu8lPzggQGvXrlVycrJ13+TJk3Xq1KkL3uFp9OmnnyouLk47d+7UoEGDJEkFBQUaPXq0vvzyS0VHR2vZsmV6+umn5Xa7ZbPZJEmzZ8/WunXrtH//fknSuHHjVFtbq7y8POvYQ4YM0YABA7R8+fKfNL/H45HD4VB1dbXsdvulnoYmdZ+d36LHQ9MOz0/y9wgAgFb2U39+X5HP5GzevFkRERHq1auXHnvsMX399dfWvtLSUoWHh1uBI0mJiYkKDAzU9u3brTXDhg2zAkeSXC6XDhw4oG+++cZak5iY6PO8LpdLpaWlF52rrq5OHo/HZwMAAGZq8cgZOXKk/va3v6moqEgvvviiiouLNWrUKNXX10uS3G63IiIifB4THBysDh06yO12W2siIyN91jTe/rE1jfubkp2dLYfDYW0xMTEt9KoBAMDPTXBLH3D8+PHWP/ft21f9+vXTTTfdpM2bN2v48OEt/XTNkpmZqYyMDOu2x+MhdAAAMNQV/wr5jTfeqE6dOungwYOSpKioKB0/ftxnzblz53Ty5ElFRUVZa6qqqnzWNN7+sTWN+5sSEhIiu93uswEAADNd8cj58ssv9fXXX6tLly6SJKfTqVOnTqmsrMxas2nTJjU0NCghIcFaU1JSorNnz1prCgsL1atXL11//fXWmqKiIp/nKiwslNPpvNIvCQAAtAHNjpyamhqVl5ervLxcknTo0CGVl5ersrJSNTU1mjFjhrZt26bDhw+rqKhI9913n3r27CmXyyVJ6t27t0aOHKmHH35YO3bs0EcffaT09HSNHz9e0dHRkqQHHnhANptNqamp2rdvn1atWqXFixf7/Krp8ccfV0FBgRYuXKj9+/dr3rx52rVrl9LT01vu7AAAgDar2ZGza9cuDRw4UAMHDpQkZWRkaODAgcrKylJQUJD27NmjX//617r55puVmpqq+Ph4/fOf/1RISIh1jDfffFOxsbEaPny4Ro8erTvuuMPnGjgOh0MbN27UoUOHFB8fryeeeEJZWVk+19K5/fbblZubqxUrVqh///56++23tW7dOvXp0+fyzwoAAGjzLus6OW0d18lp+7hODgBcffx6nRwAAAB/I3IAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYqdmRU1JSojFjxig6OloBAQFat26dz36v16usrCx16dJF7dq1U2Jioj777DOfNSdPntTEiRNlt9sVHh6u1NRU1dTU+KzZs2eP7rzzToWGhiomJkYLFiy4YJY1a9YoNjZWoaGh6tu3r95///3mvhwAAGCoZkdObW2t+vfvr6VLlza5f8GCBXrllVe0fPlybd++Xe3bt5fL5dJ3331nrZk4caL27dunwsJC5eXlqaSkRI888oi13+PxaMSIEerWrZvKysr00ksvad68eVqxYoW1ZuvWrZowYYJSU1P18ccfKzk5WcnJyaqoqGj+WQAAAMYJ8Hq93kt+cECA1q5dq+TkZOn7d3Gio6P1xBNP6Mknn5QkVVdXKzIyUjk5ORo/frw+/fRTxcXFaefOnRo0aJAkqaCgQKNHj9aXX36p6OhoLVu2TE8//bTcbrdsNpskafbs2Vq3bp32798vSRo3bpxqa2uVl5dnzTNkyBANGDBAy5cv/0nzezweORwOVVdXy263X+ppaFL32fktejw07fD8JH+PAABoZT/153eLfibn0KFDcrvdSkxMtO5zOBxKSEhQaWmpJKm0tFTh4eFW4EhSYmKiAgMDtX37dmvNsGHDrMCRJJfLpQMHDuibb76x1pz/PI1rGp+nKXV1dfJ4PD4bAAAwU4tGjtvtliRFRkb63B8ZGWntc7vdioiI8NkfHBysDh06+Kxp6hjnP8fF1jTub0p2drYcDoe1xcTEXMarBQAAP2dX1berMjMzVV1dbW1Hjhzx90gAAOAKadHIiYqKkiRVVVX53F9VVWXti4qK0vHjx332nzt3TidPnvRZ09Qxzn+Oi61p3N+UkJAQ2e12nw0AAJipRSOnR48eioqKUlFRkXWfx+PR9u3b5XQ6JUlOp1OnTp1SWVmZtWbTpk1qaGhQQkKCtaakpERnz5611hQWFqpXr166/vrrrTXnP0/jmsbnAQAAV7dmR05NTY3Ky8tVXl4uff9h4/LyclVWViogIEDTpk3Tc889p3fffVd79+7VH//4R0VHR1vfwOrdu7dGjhyphx9+WDt27NBHH32k9PR0jR8/XtHR0ZKkBx54QDabTampqdq3b59WrVqlxYsXKyMjw5rj8ccfV0FBgRYuXKj9+/dr3rx52rVrl9LT01vu7AAAgDYruLkP2LVrl+6++27rdmN4pKSkKCcnRzNnzlRtba0eeeQRnTp1SnfccYcKCgoUGhpqPebNN99Uenq6hg8frsDAQI0dO1avvPKKtd/hcGjjxo1KS0tTfHy8OnXqpKysLJ9r6dx+++3Kzc3VnDlz9NRTT+mXv/yl1q1bpz59+lzO+QAAAIa4rOvktHVcJ6ft4zo5AHD18ct1cgAAAH4uiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEYicgAAgJGIHAAAYKRgfw8AXI7us/P9PcJV4/D8JH+PAADNwjs5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBILR458+bNU0BAgM8WGxtr7f/uu++Ulpamjh076rrrrtPYsWNVVVXlc4zKykolJSXp2muvVUREhGbMmKFz5875rNm8ebNuvfVWhYSEqGfPnsrJyWnplwIAANqwK/JOzi233KJjx45Z25YtW6x906dP13vvvac1a9aouLhYR48e1f3332/tr6+vV1JSks6cOaOtW7fqjTfeUE5OjrKysqw1hw4dUlJSku6++26Vl5dr2rRpeuihh7Rhw4Yr8XIAAEAbFHxFDhocrKioqAvur66u1l//+lfl5ubqnnvukSStXLlSvXv31rZt2zRkyBBt3LhRn3zyiT744ANFRkZqwIAB+vOf/6xZs2Zp3rx5stlsWr58uXr06KGFCxdKknr37q0tW7bo5ZdflsvluhIvCQAAtDFX5J2czz77TNHR0brxxhs1ceJEVVZWSpLKysp09uxZJSYmWmtjY2N1ww03qLS0VJJUWlqqvn37KjIy0lrjcrnk8Xi0b98+a835x2hc03iMi6mrq5PH4/HZAACAmVo8chISEpSTk6OCggItW7ZMhw4d0p133qnTp0/L7XbLZrMpPDzc5zGRkZFyu92SJLfb7RM4jfsb9/3QGo/Ho2+//fais2VnZ8vhcFhbTExMi71uAADw89Liv64aNWqU9c/9+vVTQkKCunXrptWrV6tdu3Yt/XTNkpmZqYyMDOu2x+MhdAAAMNQV/wp5eHi4br75Zh08eFBRUVE6c+aMTp065bOmqqrK+gxPVFTUBd+2arz9Y2vsdvsPhlRISIjsdrvPBgAAzHTFI6empkaff/65unTpovj4eF1zzTUqKiqy9h84cECVlZVyOp2SJKfTqb179+r48ePWmsLCQtntdsXFxVlrzj9G45rGYwAAALR45Dz55JMqLi7W4cOHtXXrVv3mN79RUFCQJkyYIIfDodTUVGVkZOjDDz9UWVmZpkyZIqfTqSFDhkiSRowYobi4OE2aNEn/+te/tGHDBs2ZM0dpaWkKCQmRJD366KP64osvNHPmTO3fv1+vvvqqVq9erenTp7f0ywEAAG1Ui38m58svv9SECRP09ddfq3Pnzrrjjju0bds2de7cWZL08ssvKzAwUGPHjlVdXZ1cLpdeffVV6/FBQUHKy8vTY489JqfTqfbt2yslJUXPPvustaZHjx7Kz8/X9OnTtXjxYnXt2lWvv/46Xx8HAACWAK/X6/X3EP7i8XjkcDhUXV3d4p/P6T47v0WPB/jb4flJ/h4BAKRm/Pzmb1cBAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASMH+HgBA29B9dr6/R7gqHJ6f5O8RAGPwTg4AADASkQMAAIxE5AAAACMROQAAwEhEDgAAMBKRAwAAjETkAAAAIxE5AADASEQOAAAwEpEDAACMROQAAAAjETkAAMBIRA4AADBSm4+cpUuXqnv37goNDVVCQoJ27Njh75EAAMDPQJuOnFWrVikjI0Nz587V7t271b9/f7lcLh0/ftzfowEAAD8L8Hq9Xn8PcakSEhI0ePBgLVmyRJLU0NCgmJgYTZ06VbNnz/7Rx3s8HjkcDlVXV8tut7fobN1n57fo8QAALefw/CR/j4DL8FN/fge36lQt6MyZMyorK1NmZqZ1X2BgoBITE1VaWtrkY+rq6lRXV2fdrq6ulr4/WS2toe6/LX5MAEDLuBL/v4/W0/jf34+9T9NmI+err75SfX29IiMjfe6PjIzU/v37m3xMdna2nnnmmQvuj4mJuWJzAgB+fhyL/D0BWsLp06flcDguur/NRs6lyMzMVEZGhnW7oaFBJ0+eVMeOHRUQENBiz+PxeBQTE6MjR460+K/B8D+c59bDuW4dnOfWwXluHVfyPHu9Xp0+fVrR0dE/uK7NRk6nTp0UFBSkqqoqn/urqqoUFRXV5GNCQkIUEhLic194ePgVm9Fut/M/oFbAeW49nOvWwXluHZzn1nGlzvMPvYPTqM1+u8pmsyk+Pl5FRUXWfQ0NDSoqKpLT6fTrbAAAwP/a7Ds5kpSRkaGUlBQNGjRIt912mxYtWqTa2lpNmTLF36MBAAA/a9ORM27cOJ04cUJZWVlyu90aMGCACgoKLvgwcmsLCQnR3LlzL/jVGFoW57n1cK5bB+e5dXCeW8fP4Ty36evkAAAAXEyb/UwOAADADyFyAACAkYgcAABgJCIHAAAYichpQSUlJRozZoyio6MVEBCgdevW+XskI2VnZ2vw4MEKCwtTRESEkpOTdeDAAX+PZZxly5apX79+1oW8nE6n1q9f7++xjDd//nwFBARo2rRp/h7FOPPmzVNAQIDPFhsb6++xjPSf//xHf/jDH9SxY0e1a9dOffv21a5du1p9DiKnBdXW1qp///5aunSpv0cxWnFxsdLS0rRt2zYVFhbq7NmzGjFihGpra/09mlG6du2q+fPnq6ysTLt27dI999yj++67T/v27fP3aMbauXOn/vKXv6hfv37+HsVYt9xyi44dO2ZtW7Zs8fdIxvnmm280dOhQXXPNNVq/fr0++eQTLVy4UNdff32rz9Kmr5PzczNq1CiNGjXK32MYr6CgwOd2Tk6OIiIiVFZWpmHDhvltLtOMGTPG5/bzzz+vZcuWadu2bbrlllv8NpepampqNHHiRL322mt67rnn/D2OsYKDgy/6p3/QMl588UXFxMRo5cqV1n09evTwyyy8k4M2r7q6WpLUoUMHf49irPr6er311luqra3lz6ZcIWlpaUpKSlJiYqK/RzHaZ599pujoaN14442aOHGiKisr/T2Scd59910NGjRIv/vd7xQREaGBAwfqtdde88ssvJODNq2hoUHTpk3T0KFD1adPH3+PY5y9e/fK6XTqu+++03XXXae1a9cqLi7O32MZ56233tLu3bu1c+dOf49itISEBOXk5KhXr146duyYnnnmGd15552qqKhQWFiYv8czxhdffKFly5YpIyNDTz31lHbu3Kk//elPstlsSklJadVZiBy0aWlpaaqoqOD36ldIr169VF5erurqar399ttKSUlRcXExodOCjhw5oscff1yFhYUKDQ319zhGO//jBP369VNCQoK6deum1atXKzU11a+zmaShoUGDBg3SCy+8IEkaOHCgKioqtHz58laPHH5dhTYrPT1deXl5+vDDD9W1a1d/j2Mkm82mnj17Kj4+XtnZ2erfv78WL17s77GMUlZWpuPHj+vWW29VcHCwgoODVVxcrFdeeUXBwcGqr6/394jGCg8P180336yDBw/6exSjdOnS5YJ/Eerdu7dffjXIOzloc7xer6ZOnaq1a9dq8+bNfvtA29WooaFBdXV1/h7DKMOHD9fevXt97psyZYpiY2M1a9YsBQUF+W0209XU1Ojzzz/XpEmT/D2KUYYOHXrBZT3+/e9/q1u3bq0+C5HTgmpqanz+jeDQoUMqLy9Xhw4ddMMNN/h1NpOkpaUpNzdX77zzjsLCwuR2uyVJDodD7dq18/d4xsjMzNSoUaN0ww036PTp08rNzdXmzZu1YcMGf49mlLCwsAs+T9a+fXt17NiRz5m1sCeffFJjxoxRt27ddPToUc2dO1dBQUGaMGGCv0czyvTp03X77bfrhRde0O9//3vt2LFDK1as0IoVK1p/GC9azIcffuiVdMGWkpLi79GM0tQ5luRduXKlv0czyoMPPujt1q2b12azeTt37uwdPny4d+PGjf4e66rwq1/9yvv444/7ewzjjBs3ztulSxevzWbz/uIXv/COGzfOe/DgQX+PZaT33nvP26dPH29ISIg3NjbWu2LFCr/MEeD9/z80AAAAjMIHjwEAgJGIHAAAYCQiBwAAGInIAQAARiJyAACAkYgcAABgJCIHAAAYicgBAABGInIAAICRiBwAAGAkIgcAABiJyAEAAEb6f8M11LhmDDKeAAAAAElFTkSuQmCC",
       "text/plain": [
        "<Figure size 640x480 with 1 Axes>"
       ]
@@ -324,45 +290,31 @@
     }
    ],
    "source": [
-    "plt.hist(mkbatch(2048)[1].to(torch.float32).cpu(), bins=64)"
+    "plt.hist(mkbatch(2**15, large=False, target=\"onpath\")[1].cpu(), bins=5)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 13,
    "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(array([1516.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
-       "           0.,    0.,    0.,    0.,    0.,    0.,    0.,  420.,    0.,\n",
-       "           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
-       "           0.,    0.,    0.,    0.,    0.,   94.,    0.,    0.,    0.,\n",
-       "           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
-       "           0.,    0.,    0.,   16.,    0.,    0.,    0.,    0.,    0.,\n",
-       "           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
-       "           2.]),\n",
-       " array([1.    , 1.0625, 1.125 , 1.1875, 1.25  , 1.3125, 1.375 , 1.4375,\n",
-       "        1.5   , 1.5625, 1.625 , 1.6875, 1.75  , 1.8125, 1.875 , 1.9375,\n",
-       "        2.    , 2.0625, 2.125 , 2.1875, 2.25  , 2.3125, 2.375 , 2.4375,\n",
-       "        2.5   , 2.5625, 2.625 , 2.6875, 2.75  , 2.8125, 2.875 , 2.9375,\n",
-       "        3.    , 3.0625, 3.125 , 3.1875, 3.25  , 3.3125, 3.375 , 3.4375,\n",
-       "        3.5   , 3.5625, 3.625 , 3.6875, 3.75  , 3.8125, 3.875 , 3.9375,\n",
-       "        4.    , 4.0625, 4.125 , 4.1875, 4.25  , 4.3125, 4.375 , 4.4375,\n",
-       "        4.5   , 4.5625, 4.625 , 4.6875, 4.75  , 4.8125, 4.875 , 4.9375,\n",
-       "        5.    ]),\n",
-       " <BarContainer object of 64 artists>)"
+       "(array([2.0379e+04, 7.8380e+03, 3.0080e+03, 1.0690e+03, 3.4600e+02,\n",
+       "        9.8000e+01, 2.5000e+01, 5.0000e+00]),\n",
+       " array([1., 2., 3., 4., 5., 6., 7., 8., 9.]),\n",
+       " <BarContainer object of 8 artists>)"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     },
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGdCAYAAAAMm0nCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoW0lEQVR4nO3df1RU94H//9cgAsY4g2iZcTaotE1VjJpEEjLmd2VFpe56SjehYa3bstrNQhqjMcJpNZqkxZisRlMqsW2CZ2tOfuxZ3cYkGKpVWkMQUValhprUKKkZyB5kRshHRLnfP771noyCghmEN3k+zrnnZO59z9z3u++e8Mw4jA7LsiwBAAAYJKK3JwAAANBdBAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA40T29gR6Snt7u06cOKEhQ4bI4XD09nQAAEAXWJalU6dOyev1KiKi8/dZ+m3AnDhxQgkJCb09DQAAcAXq6up03XXXdXq93wbMkCFDpL/9D+B0Ont7OgAAoAuCwaASEhLsn+Od6bcBc/6PjZxOJwEDAIBhLvfxDz7ECwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA40T29gRMNDrvzU6vfbQy/arOBQCALyPegQEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxuh0wZWVlmjVrlrxerxwOh7Zs2dLp2H/7t3+Tw+HQc889F3K+sbFRWVlZcjqdio2NVXZ2tpqbm0PGHDhwQHfeeadiYmKUkJCgVatWdXeqAACgn+p2wLS0tGjSpEkqLCy85LjNmzfrvffek9frvehaVlaWampqVFpaqq1bt6qsrEzz58+3rweDQU2bNk2jRo1SVVWVnnnmGS1fvlwbNmzo7nQBAEA/FNndJ8yYMUMzZsy45Ji//vWveuihh7Rt2zalp6eHXDt8+LBKSkpUWVmp5ORkSdLzzz+vmTNn6tlnn5XX69WmTZt05swZvfjii4qKitL48eNVXV2t1atXh4QOAAD4cgr7Z2Da29s1Z84cLV68WOPHj7/oenl5uWJjY+14kaTU1FRFRESooqLCHnPXXXcpKirKHpOWlqba2lqdPHmyw/u2trYqGAyGHAAAoH8Ke8A8/fTTioyM1I9+9KMOr/v9fsXHx4eci4yMVFxcnPx+vz3G7XaHjDn/+PyYCxUUFMjlctlHQkJCmFYEAAD6mrAGTFVVldauXavi4mI5HI5wvvRl5efnKxAI2EddXd1VvT8AALh6whowf/jDH9TQ0KCRI0cqMjJSkZGROnbsmBYtWqTRo0dLkjwejxoaGkKed/bsWTU2Nsrj8dhj6uvrQ8acf3x+zIWio6PldDpDDgAA0D+FNWDmzJmjAwcOqLq62j68Xq8WL16sbdu2SZJ8Pp+amppUVVVlP2/Hjh1qb29XSkqKPaasrExtbW32mNLSUo0ZM0ZDhw4N55QBAICBuv1bSM3Nzfrggw/sx0ePHlV1dbXi4uI0cuRIDRs2LGT8wIED5fF4NGbMGEnSuHHjNH36dM2bN09FRUVqa2tTbm6uMjMz7V+5fuCBB7RixQplZ2dryZIlOnTokNauXas1a9Z88RUDAADjdTtg9u7dq3vvvdd+vHDhQknS3LlzVVxc3KXX2LRpk3JzczV16lRFREQoIyND69ats6+7XC698847ysnJ0eTJkzV8+HAtW7aMX6EGAACSJIdlWVZvT6InBINBuVwuBQKBsH8eZnTem51e+2hleqfXAADApXX15zd/FxIAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjdDtgysrKNGvWLHm9XjkcDm3ZssW+1tbWpiVLlmjChAkaPHiwvF6vvve97+nEiRMhr9HY2KisrCw5nU7FxsYqOztbzc3NIWMOHDigO++8UzExMUpISNCqVau+yDoBAEA/0u2AaWlp0aRJk1RYWHjRtc8++0z79u3T0qVLtW/fPv33f/+3amtr9Q//8A8h47KyslRTU6PS0lJt3bpVZWVlmj9/vn09GAxq2rRpGjVqlKqqqvTMM89o+fLl2rBhw5WuEwAA9CMOy7KsK36yw6HNmzdr9uzZnY6prKzUrbfeqmPHjmnkyJE6fPiwkpKSVFlZqeTkZElSSUmJZs6cqY8//lher1fr16/Xj3/8Y/n9fkVFRUmS8vLytGXLFr3//vtdmlswGJTL5VIgEJDT6bzSJXZodN6bnV77aGV6WO8FAMCXSVd/fvf4Z2ACgYAcDodiY2MlSeXl5YqNjbXjRZJSU1MVERGhiooKe8xdd91lx4skpaWlqba2VidPnuzpKQMAgD4usidf/PTp01qyZIm++93v2hXl9/sVHx8fOonISMXFxcnv99tjEhMTQ8a43W772tChQy+6V2trq1pbW+3HwWCwR9YEAAB6X4+9A9PW1qb77rtPlmVp/fr1PXUbW0FBgVwul30kJCT0+D0BAEDv6JGAOR8vx44dU2lpacifYXk8HjU0NISMP3v2rBobG+XxeOwx9fX1IWPOPz4/5kL5+fkKBAL2UVdX1wMrAwAAfUHYA+Z8vBw5ckS/+93vNGzYsJDrPp9PTU1Nqqqqss/t2LFD7e3tSklJsceUlZWpra3NHlNaWqoxY8Z0+MdHkhQdHS2n0xlyAACA/qnbAdPc3Kzq6mpVV1dLko4eParq6modP35cbW1t+s53vqO9e/dq06ZNOnfunPx+v/x+v86cOSNJGjdunKZPn6558+Zpz5492r17t3Jzc5WZmSmv1ytJeuCBBxQVFaXs7GzV1NTo1Vdf1dq1a7Vw4cJwrx8AABio279GvXPnTt17770XnZ87d66WL19+0Ydvz/v973+ve+65R/rbF9nl5ubqjTfeUEREhDIyMrRu3Tpde+219vgDBw4oJydHlZWVGj58uB566CEtWbKky/Pk16gBADBPV39+f6HvgenLCBgAAMzTZ74HBgAAINwIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMbpdsCUlZVp1qxZ8nq9cjgc2rJlS8h1y7K0bNkyjRgxQoMGDVJqaqqOHDkSMqaxsVFZWVlyOp2KjY1Vdna2mpubQ8YcOHBAd955p2JiYpSQkKBVq1Zd6RoBAEA/0+2AaWlp0aRJk1RYWNjh9VWrVmndunUqKipSRUWFBg8erLS0NJ0+fdoek5WVpZqaGpWWlmrr1q0qKyvT/Pnz7evBYFDTpk3TqFGjVFVVpWeeeUbLly/Xhg0brnSdAACgH3FYlmVd8ZMdDm3evFmzZ8+W/vbui9fr1aJFi/Too49KkgKBgNxut4qLi5WZmanDhw8rKSlJlZWVSk5OliSVlJRo5syZ+vjjj+X1erV+/Xr9+Mc/lt/vV1RUlCQpLy9PW7Zs0fvvv9+luQWDQblcLgUCATmdzitdYodG573Z6bWPVqaH9V4AAHyZdPXnd1g/A3P06FH5/X6lpqba51wul1JSUlReXi5JKi8vV2xsrB0vkpSamqqIiAhVVFTYY+666y47XiQpLS1NtbW1OnnyZIf3bm1tVTAYDDkAAED/FNaA8fv9kiS32x1y3u1229f8fr/i4+NDrkdGRiouLi5kTEev8fl7XKigoEAul8s+EhISwrgyAADQl/Sb30LKz89XIBCwj7q6ut6eEgAA6CFhDRiPxyNJqq+vDzlfX19vX/N4PGpoaAi5fvbsWTU2NoaM6eg1Pn+PC0VHR8vpdIYcAACgfwprwCQmJsrj8Wj79u32uWAwqIqKCvl8PkmSz+dTU1OTqqqq7DE7duxQe3u7UlJS7DFlZWVqa2uzx5SWlmrMmDEaOnRoOKcMAAAM1O2AaW5uVnV1taqrq6W/fXC3urpax48fl8Ph0IIFC/TUU0/pt7/9rQ4ePKjvfe978nq99m8qjRs3TtOnT9e8efO0Z88e7d69W7m5ucrMzJTX65UkPfDAA4qKilJ2drZqamr06quvau3atVq4cGG41w8AAAwU2d0n7N27V/fee6/9+HxUzJ07V8XFxXrsscfU0tKi+fPnq6mpSXfccYdKSkoUExNjP2fTpk3Kzc3V1KlTFRERoYyMDK1bt86+7nK59M477ygnJ0eTJ0/W8OHDtWzZspDvigEAAF9eX+h7YPoyvgcGAADz9Mr3wAAAAFwNBAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACME/aAOXfunJYuXarExEQNGjRIX/va1/Tkk0/Ksix7jGVZWrZsmUaMGKFBgwYpNTVVR44cCXmdxsZGZWVlyel0KjY2VtnZ2Wpubg73dAEAgIHCHjBPP/201q9fr5///Oc6fPiwnn76aa1atUrPP/+8PWbVqlVat26dioqKVFFRocGDBystLU2nT5+2x2RlZammpkalpaXaunWrysrKNH/+/HBPFwAAGMhhff6tkTD41re+JbfbrV//+tf2uYyMDA0aNEi/+c1vZFmWvF6vFi1apEcffVSSFAgE5Ha7VVxcrMzMTB0+fFhJSUmqrKxUcnKyJKmkpEQzZ87Uxx9/LK/Xe9l5BINBuVwuBQIBOZ3OcC5Ro/Pe7PTaRyvTw3ovAAC+TLr68zvs78BMmTJF27dv15///GdJ0v/+7//qj3/8o2bMmCFJOnr0qPx+v1JTU+3nuFwupaSkqLy8XJJUXl6u2NhYO14kKTU1VREREaqoqAj3lAEAgGEiw/2CeXl5CgaDGjt2rAYMGKBz587ppz/9qbKysiRJfr9fkuR2u0Oe53a77Wt+v1/x8fGhE42MVFxcnD3mQq2trWptbbUfB4PBcC8NAAD0EWF/B+a1117Tpk2b9PLLL2vfvn3auHGjnn32WW3cuDHctwpRUFAgl8tlHwkJCT16PwAA0HvCHjCLFy9WXl6eMjMzNWHCBM2ZM0ePPPKICgoKJEkej0eSVF9fH/K8+vp6+5rH41FDQ0PI9bNnz6qxsdEec6H8/HwFAgH7qKurC/fSAABAHxH2gPnss88UERH6sgMGDFB7e7skKTExUR6PR9u3b7evB4NBVVRUyOfzSZJ8Pp+amppUVVVlj9mxY4fa29uVkpLS4X2jo6PldDpDDgAA0D+F/TMws2bN0k9/+lONHDlS48eP1/79+7V69Wr94Ac/kCQ5HA4tWLBATz31lK6//nolJiZq6dKl8nq9mj17tiRp3Lhxmj59uubNm6eioiK1tbUpNzdXmZmZXfoNJAAA0L+FPWCef/55LV26VP/+7/+uhoYGeb1e/fCHP9SyZcvsMY899phaWlo0f/58NTU16Y477lBJSYliYmLsMZs2bVJubq6mTp2qiIgIZWRkaN26deGeLgAAMFDYvwemr+B7YAAAME+vfQ8MAABATyNgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGKdHAuavf/2r/vmf/1nDhg3ToEGDNGHCBO3du9e+blmWli1bphEjRmjQoEFKTU3VkSNHQl6jsbFRWVlZcjqdio2NVXZ2tpqbm3tiugAAwDBhD5iTJ0/q9ttv18CBA/X222/rT3/6k/7jP/5DQ4cOtcesWrVK69atU1FRkSoqKjR48GClpaXp9OnT9pisrCzV1NSotLRUW7duVVlZmebPnx/u6QIAAAM5LMuywvmCeXl52r17t/7whz90eN2yLHm9Xi1atEiPPvqoJCkQCMjtdqu4uFiZmZk6fPiwkpKSVFlZqeTkZElSSUmJZs6cqY8//lher/ey8wgGg3K5XAoEAnI6neFcokbnvdnptY9Wpof1XgAAfJl09ed32N+B+e1vf6vk5GT90z/9k+Lj43XTTTfpl7/8pX396NGj8vv9Sk1Ntc+5XC6lpKSovLxcklReXq7Y2Fg7XiQpNTVVERERqqio6PC+ra2tCgaDIQcAAOifwh4wf/nLX7R+/Xpdf/312rZtmx588EH96Ec/0saNGyVJfr9fkuR2u0Oe53a77Wt+v1/x8fEh1yMjIxUXF2ePuVBBQYFcLpd9JCQkhHtpAACgjwh7wLS3t+vmm2/Wz372M910002aP3++5s2bp6KionDfKkR+fr4CgYB91NXV9ej9AABA7wl7wIwYMUJJSUkh58aNG6fjx49LkjwejySpvr4+ZEx9fb19zePxqKGhIeT62bNn1djYaI+5UHR0tJxOZ8gBAAD6p7AHzO23367a2tqQc3/+8581atQoSVJiYqI8Ho+2b99uXw8Gg6qoqJDP55Mk+Xw+NTU1qaqqyh6zY8cOtbe3KyUlJdxTBgAAhokM9ws+8sgjmjJlin72s5/pvvvu0549e7RhwwZt2LBBkuRwOLRgwQI99dRTuv7665WYmKilS5fK6/Vq9uzZ0t/esZk+fbr9R09tbW3Kzc1VZmZml34DCQAA9G9hD5hbbrlFmzdvVn5+vp544gklJibqueeeU1ZWlj3mscceU0tLi+bPn6+mpibdcccdKikpUUxMjD1m06ZNys3N1dSpUxUREaGMjAytW7cu3NMFAAAGCvv3wPQVfA8MAADm6bXvgQEAAOhpBAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACM0+MBs3LlSjkcDi1YsMA+d/r0aeXk5GjYsGG69tprlZGRofr6+pDnHT9+XOnp6brmmmsUHx+vxYsX6+zZsz09XXxJjM57s9MDAND39WjAVFZW6oUXXtDEiRNDzj/yyCN644039Prrr2vXrl06ceKEvv3tb9vXz507p/T0dJ05c0bvvvuuNm7cqOLiYi1btqwnpwsAAAzRYwHT3NysrKws/fKXv9TQoUPt84FAQL/+9a+1evVqffOb39TkyZP10ksv6d1339V7770nSXrnnXf0pz/9Sb/5zW904403asaMGXryySdVWFioM2fO9NSUAQCAIXosYHJycpSenq7U1NSQ81VVVWpraws5P3bsWI0cOVLl5eWSpPLyck2YMEFut9sek5aWpmAwqJqamg7v19raqmAwGHIAAID+KbInXvSVV17Rvn37VFlZedE1v9+vqKgoxcbGhpx3u93y+/32mM/Hy/nr5691pKCgQCtWrAjjKgAAQF8V9ndg6urq9PDDD2vTpk2KiYkJ98t3Kj8/X4FAwD7q6uqu2r0BAMDVFfaAqaqqUkNDg26++WZFRkYqMjJSu3bt0rp16xQZGSm3260zZ86oqakp5Hn19fXyeDySJI/Hc9FvJZ1/fH7MhaKjo+V0OkMOAADQP4U9YKZOnaqDBw+qurraPpKTk5WVlWX/88CBA7V9+3b7ObW1tTp+/Lh8Pp8kyefz6eDBg2poaLDHlJaWyul0KikpKdxTBgAAhgn7Z2CGDBmiG264IeTc4MGDNWzYMPt8dna2Fi5cqLi4ODmdTj300EPy+Xy67bbbJEnTpk1TUlKS5syZo1WrVsnv9+snP/mJcnJyFB0dHe4pAwAAw/TIh3gvZ82aNYqIiFBGRoZaW1uVlpamX/ziF/b1AQMGaOvWrXrwwQfl8/k0ePBgzZ07V0888URvTBcAAPQxVyVgdu7cGfI4JiZGhYWFKiws7PQ5o0aN0ltvvXUVZgcAAEzD34UEAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACMQ8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwTtgDpqCgQLfccouGDBmi+Ph4zZ49W7W1tSFjTp8+rZycHA0bNkzXXnutMjIyVF9fHzLm+PHjSk9P1zXXXKP4+HgtXrxYZ8+eDfd0AQCAgcIeMLt27VJOTo7ee+89lZaWqq2tTdOmTVNLS4s95pFHHtEbb7yh119/Xbt27dKJEyf07W9/275+7tw5paen68yZM3r33Xe1ceNGFRcXa9myZeGeLgAAMJDDsiyrJ2/w6aefKj4+Xrt27dJdd92lQCCgr3zlK3r55Zf1ne98R5L0/vvva9y4cSovL9dtt92mt99+W9/61rd04sQJud1uSVJRUZGWLFmiTz/9VFFRUZe9bzAYlMvlUiAQkNPpDOuaRue92em1j1amh/Ve6BnsIQD0TV39+d3jn4EJBAKSpLi4OElSVVWV2tralJqaao8ZO3asRo4cqfLycklSeXm5JkyYYMeLJKWlpSkYDKqmpqbD+7S2tioYDIYcAACgf+rRgGlvb9eCBQt0++2364YbbpAk+f1+RUVFKTY2NmSs2+2W3++3x3w+Xs5fP3+tIwUFBXK5XPaRkJDQQ6sCAAC9rUcDJicnR4cOHdIrr7zSk7eRJOXn5ysQCNhHXV1dj98TAAD0jsieeuHc3Fxt3bpVZWVluu666+zzHo9HZ86cUVNTU8i7MPX19fJ4PPaYPXv2hLze+d9SOj/mQtHR0YqOju6h1QAAgL4k7O/AWJal3Nxcbd68WTt27FBiYmLI9cmTJ2vgwIHavn27fa62tlbHjx+Xz+eTJPl8Ph08eFANDQ32mNLSUjmdTiUlJYV7ygAAwDBhfwcmJydHL7/8sv7nf/5HQ4YMsT+z4nK5NGjQILlcLmVnZ2vhwoWKi4uT0+nUQw89JJ/Pp9tuu02SNG3aNCUlJWnOnDlatWqV/H6/fvKTnygnJ4d3WQAAQPgDZv369ZKke+65J+T8Sy+9pH/5l3+RJK1Zs0YRERHKyMhQa2ur0tLS9Itf/MIeO2DAAG3dulUPPvigfD6fBg8erLlz5+qJJ54I93QBAICBwh4wXflamZiYGBUWFqqwsLDTMaNGjdJbb70V5tkBAID+gL8LCQAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYJ7K3JwAAV2J03pudXvtoZfpVnQuAq493YAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcQgYAABgHAIGAAAYh4ABAADGIWAAAIBxCBgAAGAcAgYAABiHgAEAAMYhYAAAgHEIGAAAYBwCBgAAGIeAAQAAxiFgAACAcfp0wBQWFmr06NGKiYlRSkqK9uzZ09tTAgAAfUBkb0+gM6+++qoWLlyooqIipaSk6LnnnlNaWppqa2sVHx/f29MDAHxBo/Pe7PTaRyvTr+pcYJ4++w7M6tWrNW/ePH3/+99XUlKSioqKdM011+jFF1/s7akBAIBe1iffgTlz5oyqqqqUn59vn4uIiFBqaqrKy8s7fE5ra6taW1vtx4FAQJIUDAbDPr/21s86vdYT90P4sYfmYw/Nxx6iI+f33rKsS47rkwHzf//3fzp37pzcbnfIebfbrffff7/D5xQUFGjFihUXnU9ISOixeXbE9dxVvR16AHtoPvbQfOwhTp06JZfL1en1PhkwVyI/P18LFy60H7e3t6uxsVHDhg2Tw+EI232CwaASEhJUV1cnp9MZttftS/r7Glmf+fr7Gvv7+vQlWCPru3KWZenUqVPyer2XHNcnA2b48OEaMGCA6uvrQ87X19fL4/F0+Jzo6GhFR0eHnIuNje2xOTqdzn75f8rP6+9rZH3m6+9r7O/r05dgjazvylzqnZfz+uSHeKOiojR58mRt377dPtfe3q7t27fL5/P16twAAEDv65PvwEjSwoULNXfuXCUnJ+vWW2/Vc889p5aWFn3/+9/v7akBAIBe1mcD5v7779enn36qZcuWye/368Ybb1RJSclFH+y92qKjo/X4449f9MdV/Ul/XyPrM19/X2N/X5++BGtkfT3PYV3u95QAAAD6mD75GRgAAIBLIWAAAIBxCBgAAGAcAgYAABiHgLlAWVmZZs2aJa/XK4fDoS1btlz2OTt37tTNN9+s6Ohoff3rX1dxcfFVmeuV6O76du7cKYfDcdHh9/uv2py7o6CgQLfccouGDBmi+Ph4zZ49W7W1tZd93uuvv66xY8cqJiZGEyZM0FtvvXVV5nslrmSNxcXFF+1hTEzMVZtzd6xfv14TJ060vyDL5/Pp7bffvuRzTNq/7q7PpL3ryMqVK+VwOLRgwYJLjjNpDy/UlTWatI/Lly+/aK5jx4695HN6Y/8ImAu0tLRo0qRJKiws7NL4o0ePKj09Xffee6+qq6u1YMEC/eu//qu2bdvW43O9Et1d33m1tbX65JNP7CM+Pr7H5vhF7Nq1Szk5OXrvvfdUWlqqtrY2TZs2TS0tLZ0+591339V3v/tdZWdna//+/Zo9e7Zmz56tQ4cOXdW5d9WVrFF/+8bMz+/hsWPHrtqcu+O6667TypUrVVVVpb179+qb3/ym/vEf/1E1NTUdjjdt/7q7Phm0dxeqrKzUCy+8oIkTJ15ynGl7+HldXaMM28fx48eHzPWPf/xjp2N7bf8sdEqStXnz5kuOeeyxx6zx48eHnLv//vuttLS0Hp7dF9eV9f3+97+3JFknT568avMKp4aGBkuStWvXrk7H3HfffVZ6enrIuZSUFOuHP/zhVZjhF9eVNb700kuWy+W6qvMKp6FDh1q/+tWvOrxm+v5Zl1mfqXt36tQp6/rrr7dKS0utu+++23r44Yc7HWvqHnZnjSbt4+OPP25NmjSpy+N7a/94B+YLKi8vV2pqasi5tLQ0lZeX99qcesKNN96oESNG6O///u+1e/fu3p5OlwUCAUlSXFxcp2NM38OurFGSmpubNWrUKCUkJFz2v/j7inPnzumVV15RS0tLp3+NiMn715X1ydC9y8nJUXp6+kV70xFT97A7a5Rh+3jkyBF5vV599atfVVZWlo4fP97p2N7avz77Tbym8Pv9F307sNvtVjAY1P/7f/9PgwYN6rW5hcOIESNUVFSk5ORktba26le/+pXuueceVVRU6Oabb+7t6V1Se3u7FixYoNtvv1033HBDp+M628O++jmfz+vqGseMGaMXX3xREydOVCAQ0LPPPqspU6aopqZG11133VWdc1ccPHhQPp9Pp0+f1rXXXqvNmzcrKSmpw7Em7l931mfa3knSK6+8on379qmysrJL403cw+6u0aR9TElJUXFxscaMGaNPPvlEK1as0J133qlDhw5pyJAhF43vrf0jYHBJY8aM0ZgxY+zHU6ZM0Ycffqg1a9boP//zP3t1bpeTk5OjQ4cOXfLPbk3X1TX6fL6Q/8KfMmWKxo0bpxdeeEFPPvnkVZhp94wZM0bV1dUKBAL6r//6L82dO1e7du3q9Ie8abqzPtP2rq6uTg8//LBKS0v77IdUv6grWaNJ+zhjxgz7nydOnKiUlBSNGjVKr732mrKzs3t1bp9HwHxBHo9H9fX1Iefq6+vldDqNf/elM7feemufj4Lc3Fxt3bpVZWVll/2vm8720OPx9PAsv5jurPFCAwcO1E033aQPPvigx+b3RURFRenrX/+6JGny5MmqrKzU2rVr9cILL1w01sT96876LtTX966qqkoNDQ0h79CeO3dOZWVl+vnPf67W1lYNGDAg5Dmm7eGVrPFCfX0fPy82Nlbf+MY3Op1rb+0fn4H5gnw+n7Zv3x5yrrS09JJ/nm266upqjRgxoren0SHLspSbm6vNmzdrx44dSkxMvOxzTNvDK1njhc6dO6eDBw/22X28UHt7u1pbWzu8Ztr+deRS67tQX9+7qVOn6uDBg6qurraP5ORkZWVlqbq6usMf7Kbt4ZWs8UJ9fR8/r7m5WR9++GGnc+21/evRjwgb6NSpU9b+/fut/fv3W5Ks1atXW/v377eOHTtmWZZl5eXlWXPmzLHH/+Uvf7GuueYaa/Hixdbhw4etwsJCa8CAAVZJSUkvrqJz3V3fmjVrrC1btlhHjhyxDh48aD388MNWRESE9bvf/a4XV9G5Bx980HK5XNbOnTutTz75xD4+++wze8ycOXOsvLw8+/Hu3butyMhI69lnn7UOHz5sPf7449bAgQOtgwcP9tIqLu1K1rhixQpr27Zt1ocffmhVVVVZmZmZVkxMjFVTU9NLq+hcXl6etWvXLuvo0aPWgQMHrLy8PMvhcFjvvPOOZfWD/evu+kzau85c+Bs6pu9hRy63RpP2cdGiRdbOnTuto0ePWrt377ZSU1Ot4cOHWw0NDZbVh/aPgLnA+V8bvvCYO3euZVmWNXfuXOvuu+++6Dk33nijFRUVZX31q1+1XnrppV6a/eV1d31PP/209bWvfc2KiYmx4uLirHvuucfasWNHL67g0jpam6SQPbn77rvt9Z732muvWd/4xjesqKgoa/z48dabb77ZC7PvmitZ44IFC6yRI0daUVFRltvttmbOnGnt27evl1ZwaT/4wQ+sUaNGWVFRUdZXvvIVa+rUqfYPd6sf7F9312fS3nXmwh/upu9hRy63RpP28f7777dGjBhhRUVFWX/3d39n3X///dYHH3xgX+8r++ew/v9/IQIAABiDz8AAAADjEDAAAMA4BAwAADAOAQMAAIxDwAAAAOMQMAAAwDgEDAAAMA4BAwAAjEPAAAAA4xAwAADAOAQMAAAwDgEDAACM8/8BbhuvIZklPe0AAAAASUVORK5CYII=",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGdCAYAAADwjmIIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAwsElEQVR4nO3de3BUZZ7/8U8T7AQwHa65DSEEcAiXAHIxtBeUJZOAGcasjCKioEQZ3KBAFCHKYICdCQMFyBS3ZVXCrCDIlkQFBUIQGIaoEIwQHDKCMNGFDs4IaYgaSHJ+/2zOz15ADXZs8vh+VZ2qnOf59unv01r0p06fPu2wLMsSAACAYZoEugEAAICGQMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABipaaAbCKTa2lqdPHlSoaGhcjgcgW4HAAB8D5Zl6dy5c4qOjlaTJlc+X/OTDjknT55UTExMoNsAAABX4dNPP1X79u2vOP+TDjmhoaHS/75ILpcr0O0AAIDvwev1KiYmxn4fv5KfdMip+4jK5XIRcgAAaGS+61ITLjwGAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMFLTQDdgqo7TNwe6hQZ3Ym5qoFsAAOCKOJMDAACMRMgBAABGqlfIycnJ0YABAxQaGqrw8HClpaWptLTUp+brr79WRkaG2rRpo+uvv14jRoxQeXm5T01ZWZlSU1PVvHlzhYeHa+rUqaqurvap2blzp/r27avg4GB16dJFubm5l/SzdOlSdezYUSEhIUpMTNT7779fv9UDAABj1Svk7Nq1SxkZGXr33XeVn5+vixcvKjk5WZWVlXbNlClT9Oabb2rDhg3atWuXTp48qbvvvtuer6mpUWpqqi5cuKC9e/dq9erVys3N1cyZM+2a48ePKzU1VYMHD1ZxcbEmT56sRx55RFu3brVr1q9fr8zMTD333HM6cOCAevfurZSUFJ0+ffqHvyoAAKDRc1iWZV3tgz///HOFh4dr165dGjRokCoqKtSuXTutXbtWv/71ryVJR44cUbdu3VRYWKiBAwfq7bff1i9/+UudPHlSERERkqQVK1Zo2rRp+vzzz+V0OjVt2jRt3rxZJSUl9nPdd999Onv2rLZs2SJJSkxM1IABA7RkyRJJUm1trWJiYvT4449r+vTp36t/r9ersLAwVVRUyOVyXe3LcFlceAwAQMP4vu/fP+ianIqKCklS69atJUlFRUW6ePGikpKS7Jr4+Hh16NBBhYWFkqTCwkIlJCTYAUeSUlJS5PV6dfjwYbvmm8eoq6k7xoULF1RUVORT06RJEyUlJdk1l1NVVSWv1+uzAQAAM111yKmtrdXkyZN1yy23qGfPnpIkj8cjp9Opli1b+tRGRETI4/HYNd8MOHXzdXPfVuP1evXVV1/pH//4h2pqai5bU3eMy8nJyVFYWJi9xcTEXO3yAQDANe6qQ05GRoZKSkq0bt06/3bUgLKyslRRUWFvn376aaBbAgAADeSqbgY4ceJEbdq0Sbt371b79u3t8cjISF24cEFnz571OZtTXl6uyMhIu+b/fguq7ttX36z5v9/IKi8vl8vlUrNmzRQUFKSgoKDL1tQd43KCg4MVHBx8NUsGAACNTL3O5FiWpYkTJ2rjxo3asWOH4uLifOb79eun6667TgUFBfZYaWmpysrK5Ha7JUlut1uHDh3y+RZUfn6+XC6Xunfvbtd88xh1NXXHcDqd6tevn09NbW2tCgoK7BoAAPDTVq8zORkZGVq7dq1ef/11hYaG2te/hIWFqVmzZgoLC1N6eroyMzPVunVruVwuPf7443K73Ro4cKAkKTk5Wd27d9eDDz6oefPmyePxaMaMGcrIyLDPskyYMEFLlizR008/rXHjxmnHjh169dVXtXnz///GUmZmpsaOHav+/fvrpptu0vPPP6/Kyko9/PDD/n2FAABAo1SvkLN8+XJJ0h133OEzvmrVKj300EOSpEWLFqlJkyYaMWKEqqqqlJKSomXLltm1QUFB2rRpkx577DG53W61aNFCY8eO1ezZs+2auLg4bd68WVOmTNHixYvVvn17vfDCC0pJSbFrRo4cqc8//1wzZ86Ux+NRnz59tGXLlksuRgYAAD9NP+g+OY0d98n5YbhPDgAgEH6U++QAAABcqwg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAj1Tvk7N69W8OHD1d0dLQcDofy8vJ85h0Ox2W3+fPn2zUdO3a8ZH7u3Lk+xzl48KBuu+02hYSEKCYmRvPmzbuklw0bNig+Pl4hISFKSEjQW2+9Vd/lAAAAQ9U75FRWVqp3795aunTpZedPnTrls7300ktyOBwaMWKET93s2bN96h5//HF7zuv1Kjk5WbGxsSoqKtL8+fOVnZ2tlStX2jV79+7VqFGjlJ6erg8++EBpaWlKS0tTSUlJfZcEAAAM1LS+Dxg2bJiGDRt2xfnIyEif/ddff12DBw9Wp06dfMZDQ0Mvqa2zZs0aXbhwQS+99JKcTqd69Oih4uJiLVy4UOPHj5ckLV68WEOHDtXUqVMlSXPmzFF+fr6WLFmiFStW1HdZAADAMA16TU55ebk2b96s9PT0S+bmzp2rNm3a6MYbb9T8+fNVXV1tzxUWFmrQoEFyOp32WEpKikpLS3XmzBm7JikpyeeYKSkpKiwsvGI/VVVV8nq9PhsAADBTvc/k1Mfq1asVGhqqu+++22f8iSeeUN++fdW6dWvt3btXWVlZOnXqlBYuXChJ8ng8iouL83lMRESEPdeqVSt5PB577Js1Ho/niv3k5ORo1qxZflwhAAC4VjVoyHnppZc0evRohYSE+IxnZmbaf/fq1UtOp1O/+c1vlJOTo+Dg4AbrJysry+e5vV6vYmJiGuz5AABA4DRYyPnzn/+s0tJSrV+//jtrExMTVV1drRMnTqhr166KjIxUeXm5T03dft11PFequdJ1PpIUHBzcoCEKAABcOxrsmpwXX3xR/fr1U+/evb+ztri4WE2aNFF4eLgkye12a/fu3bp48aJdk5+fr65du6pVq1Z2TUFBgc9x8vPz5Xa7/b4WAADQ+NQ75Jw/f17FxcUqLi6WJB0/flzFxcUqKyuza7xerzZs2KBHHnnkkscXFhbq+eef14cffqhPPvlEa9as0ZQpU/TAAw/YAeb++++X0+lUenq6Dh8+rPXr12vx4sU+HzVNmjRJW7Zs0YIFC3TkyBFlZ2dr//79mjhx4tW+FgAAwCD1/rhq//79Gjx4sL1fFzzGjh2r3NxcSdK6detkWZZGjRp1yeODg4O1bt06ZWdnq6qqSnFxcZoyZYpPgAkLC9O2bduUkZGhfv36qW3btpo5c6b99XFJuvnmm7V27VrNmDFDzzzzjG644Qbl5eWpZ8+e9X8VAACAcRyWZVmBbiJQvF6vwsLCVFFRIZfL5ddjd5y+2a/HuxadmJsa6BYAAD9B3/f9m9+uAgAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMVO+Qs3v3bg0fPlzR0dFyOBzKy8vzmX/ooYfkcDh8tqFDh/rUfPHFFxo9erRcLpdatmyp9PR0nT9/3qfm4MGDuu222xQSEqKYmBjNmzfvkl42bNig+Ph4hYSEKCEhQW+99VZ9lwMAAAxV75BTWVmp3r17a+nSpVesGTp0qE6dOmVvr7zyis/86NGjdfjwYeXn52vTpk3avXu3xo8fb897vV4lJycrNjZWRUVFmj9/vrKzs7Vy5Uq7Zu/evRo1apTS09P1wQcfKC0tTWlpaSopKanvkgAAgIEclmVZV/1gh0MbN25UWlqaPfbQQw/p7Nmzl5zhqfPXv/5V3bt31759+9S/f39J0pYtW3TnnXfqs88+U3R0tJYvX65nn31WHo9HTqdTkjR9+nTl5eXpyJEjkqSRI0eqsrJSmzZtso89cOBA9enTRytWrPhe/Xu9XoWFhamiokIul+tqX4bL6jh9s1+Pdy06MTc10C0AAH6Cvu/7d4Nck7Nz506Fh4era9eueuyxx/TPf/7TnissLFTLli3tgCNJSUlJatKkid577z27ZtCgQXbAkaSUlBSVlpbqzJkzdk1SUpLP86akpKiwsPCKfVVVVcnr9fpsAADATH4POUOHDtWf/vQnFRQU6A9/+IN27dqlYcOGqaamRpLk8XgUHh7u85imTZuqdevW8ng8dk1ERIRPTd3+d9XUzV9OTk6OwsLC7C0mJsZPqwYAANeapv4+4H333Wf/nZCQoF69eqlz587auXOnhgwZ4u+nq5esrCxlZmba+16vl6ADAIChGvwr5J06dVLbtm119OhRSVJkZKROnz7tU1NdXa0vvvhCkZGRdk15eblPTd3+d9XUzV9OcHCwXC6XzwYAAMzU4CHns88+0z//+U9FRUVJktxut86ePauioiK7ZseOHaqtrVViYqJds3v3bl28eNGuyc/PV9euXdWqVSu7pqCgwOe58vPz5Xa7G3pJAACgEah3yDl//ryKi4tVXFwsSTp+/LiKi4tVVlam8+fPa+rUqXr33Xd14sQJFRQU6K677lKXLl2UkpIiSerWrZuGDh2qRx99VO+//77+8pe/aOLEibrvvvsUHR0tSbr//vvldDqVnp6uw4cPa/369Vq8eLHPR02TJk3Sli1btGDBAh05ckTZ2dnav3+/Jk6c6L9XBwAANFr1Djn79+/XjTfeqBtvvFGSlJmZqRtvvFEzZ85UUFCQDh48qF/96lf6+c9/rvT0dPXr109//vOfFRwcbB9jzZo1io+P15AhQ3TnnXfq1ltv9bkHTlhYmLZt26bjx4+rX79+evLJJzVz5kyfe+ncfPPNWrt2rVauXKnevXvrv//7v5WXl6eePXv+8FcFAAA0ej/oPjmNHffJ+WG4Tw4AIBACep8cAACAQCPkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMVO+Qs3v3bg0fPlzR0dFyOBzKy8uz5y5evKhp06YpISFBLVq0UHR0tMaMGaOTJ0/6HKNjx45yOBw+29y5c31qDh48qNtuu00hISGKiYnRvHnzLullw4YNio+PV0hIiBISEvTWW2/VdzkAAMBQ9Q45lZWV6t27t5YuXXrJ3JdffqkDBw7ot7/9rQ4cOKDXXntNpaWl+tWvfnVJ7ezZs3Xq1Cl7e/zxx+05r9er5ORkxcbGqqioSPPnz1d2drZWrlxp1+zdu1ejRo1Senq6PvjgA6WlpSktLU0lJSX1XRIAADBQ0/o+YNiwYRo2bNhl58LCwpSfn+8ztmTJEt10000qKytThw4d7PHQ0FBFRkZe9jhr1qzRhQsX9NJLL8npdKpHjx4qLi7WwoULNX78eEnS4sWLNXToUE2dOlWSNGfOHOXn52vJkiVasWJFfZcFAAAM0+DX5FRUVMjhcKhly5Y+43PnzlWbNm104403av78+aqurrbnCgsLNWjQIDmdTnssJSVFpaWlOnPmjF2TlJTkc8yUlBQVFhZesZeqqip5vV6fDQAAmKneZ3Lq4+uvv9a0adM0atQouVwue/yJJ55Q37591bp1a+3du1dZWVk6deqUFi5cKEnyeDyKi4vzOVZERIQ916pVK3k8HnvsmzUej+eK/eTk5GjWrFl+XiUAALgWNVjIuXjxou69915ZlqXly5f7zGVmZtp/9+rVS06nU7/5zW+Uk5Oj4ODghmpJWVlZPs/t9XoVExPTYM8HAAACp0FCTl3A+fvf/64dO3b4nMW5nMTERFVXV+vEiRPq2rWrIiMjVV5e7lNTt193Hc+Vaq50nY8kBQcHN2iIAgAA1w6/X5NTF3A+/vhjbd++XW3atPnOxxQXF6tJkyYKDw+XJLndbu3evVsXL160a/Lz89W1a1e1atXKrikoKPA5Tn5+vtxut7+XBAAAGqF6n8k5f/68jh49au8fP35cxcXFat26taKiovTrX/9aBw4c0KZNm1RTU2NfI9O6dWs5nU4VFhbqvffe0+DBgxUaGqrCwkJNmTJFDzzwgB1g7r//fs2aNUvp6emaNm2aSkpKtHjxYi1atMh+3kmTJun222/XggULlJqaqnXr1mn//v0+XzMHAAA/XQ7Lsqz6PGDnzp0aPHjwJeNjx45Vdnb2JRcM13nnnXd0xx136MCBA/q3f/s3HTlyRFVVVYqLi9ODDz6ozMxMn4+SDh48qIyMDO3bt09t27bV448/rmnTpvkcc8OGDZoxY4ZOnDihG264QfPmzdOdd975vdfi9XoVFhamioqK7/xIrb46Tt/s1+Ndi07MTQ10CwCAn6Dv+/5d75BjEkLOD0PIAQAEwvd9/+a3qwAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAI9U75OzevVvDhw9XdHS0HA6H8vLyfOYty9LMmTMVFRWlZs2aKSkpSR9//LFPzRdffKHRo0fL5XKpZcuWSk9P1/nz531qDh48qNtuu00hISGKiYnRvHnzLullw4YNio+PV0hIiBISEvTWW2/VdzkAAMBQ9Q45lZWV6t27t5YuXXrZ+Xnz5umPf/yjVqxYoffee08tWrRQSkqKvv76a7tm9OjROnz4sPLz87Vp0ybt3r1b48ePt+e9Xq+Sk5MVGxuroqIizZ8/X9nZ2Vq5cqVds3fvXo0aNUrp6en64IMPlJaWprS0NJWUlNT/VQAAAMZxWJZlXfWDHQ5t3LhRaWlp0v+exYmOjtaTTz6pp556SpJUUVGhiIgI5ebm6r777tNf//pXde/eXfv27VP//v0lSVu2bNGdd96pzz77TNHR0Vq+fLmeffZZeTweOZ1OSdL06dOVl5enI0eOSJJGjhypyspKbdq0ye5n4MCB6tOnj1asWPG9+vd6vQoLC1NFRYVcLtfVvgyX1XH6Zr8e71p0Ym5qoFsAAPwEfd/3b79ek3P8+HF5PB4lJSXZY2FhYUpMTFRhYaEkqbCwUC1btrQDjiQlJSWpSZMmeu+99+yaQYMG2QFHklJSUlRaWqozZ87YNd98nrqauucBAAA/bU39eTCPxyNJioiI8BmPiIiw5zwej8LDw32baNpUrVu39qmJi4u75Bh1c61atZLH4/nW57mcqqoqVVVV2fter/cqVwoAAK51P6lvV+Xk5CgsLMzeYmJiAt0SAABoIH4NOZGRkZKk8vJyn/Hy8nJ7LjIyUqdPn/aZr66u1hdffOFTc7ljfPM5rlRTN385WVlZqqiosLdPP/30B6wWAABcy/wacuLi4hQZGamCggJ7zOv16r333pPb7ZYkud1unT17VkVFRXbNjh07VFtbq8TERLtm9+7dunjxol2Tn5+vrl27qlWrVnbNN5+nrqbueS4nODhYLpfLZwMAAGaqd8g5f/68iouLVVxcLP3vxcbFxcUqKyuTw+HQ5MmT9e///u964403dOjQIY0ZM0bR0dH2N7C6deumoUOH6tFHH9X777+vv/zlL5o4caLuu+8+RUdHS5Luv/9+OZ1Opaen6/Dhw1q/fr0WL16szMxMu49JkyZpy5YtWrBggY4cOaLs7Gzt379fEydO9N+rAwAAGq16X3i8f/9+DR482N6vCx5jx45Vbm6unn76aVVWVmr8+PE6e/asbr31Vm3ZskUhISH2Y9asWaOJEydqyJAhatKkiUaMGKE//vGP9nxYWJi2bdumjIwM9evXT23bttXMmTN97qVz8803a+3atZoxY4aeeeYZ3XDDDcrLy1PPnj1/yOsBAAAM8YPuk9PYcZ+cH4b75AAAAiEg98kBAAC4VhByAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACM1DTQDaDx6jh9c6BbaFAn5qYGugUAwA/AmRwAAGAkv4ecjh07yuFwXLJlZGRIku64445L5iZMmOBzjLKyMqWmpqp58+YKDw/X1KlTVV1d7VOzc+dO9e3bV8HBwerSpYtyc3P9vRQAANCI+f3jqn379qmmpsbeLykp0S9+8Qvdc8899tijjz6q2bNn2/vNmze3/66pqVFqaqoiIyO1d+9enTp1SmPGjNF1112n3//+95Kk48ePKzU1VRMmTNCaNWtUUFCgRx55RFFRUUpJSfH3kgAAQCPk95DTrl07n/25c+eqc+fOuv322+2x5s2bKzIy8rKP37Ztmz766CNt375dERER6tOnj+bMmaNp06YpOztbTqdTK1asUFxcnBYsWCBJ6tatm/bs2aNFixYRcgAAgNTQ1+RcuHBBL7/8ssaNGyeHw2GPr1mzRm3btlXPnj2VlZWlL7/80p4rLCxUQkKCIiIi7LGUlBR5vV4dPnzYrklKSvJ5rpSUFBUWFn5rP1VVVfJ6vT4bAAAwU4N+uyovL09nz57VQw89ZI/df//9io2NVXR0tA4ePKhp06aptLRUr732miTJ4/H4BBxJ9r7H4/nWGq/Xq6+++krNmjW7bD85OTmaNWuW39cJAACuPQ0acl588UUNGzZM0dHR9tj48ePtvxMSEhQVFaUhQ4bo2LFj6ty5c0O2o6ysLGVmZtr7Xq9XMTExDfqcAAAgMBos5Pz973/X9u3b7TM0V5KYmChJOnr0qDp37qzIyEi9//77PjXl5eWSZF/HExkZaY99s8blcl3xLI4kBQcHKzg4+KrXBAAAGo8GuyZn1apVCg8PV2rqt99Qrbi4WJIUFRUlSXK73Tp06JBOnz5t1+Tn58vlcql79+52TUFBgc9x8vPz5Xa7G2AlAACgMWqQkFNbW6tVq1Zp7Nixatr0/58sOnbsmObMmaOioiKdOHFCb7zxhsaMGaNBgwapV69ekqTk5GR1795dDz74oD788ENt3bpVM2bMUEZGhn0WZsKECfrkk0/09NNP68iRI1q2bJleffVVTZkypSGWAwAAGqEGCTnbt29XWVmZxo0b5zPudDq1fft2JScnKz4+Xk8++aRGjBihN998064JCgrSpk2bFBQUJLfbrQceeEBjxozxua9OXFycNm/erPz8fPXu3VsLFizQCy+8wNfHAQCAzWFZlhXoJgLF6/UqLCxMFRUVcrlcfj226b/r9FPAb1cBwLXp+75/89tVAADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICR/B5ysrOz5XA4fLb4+Hh7/uuvv1ZGRobatGmj66+/XiNGjFB5ebnPMcrKypSamqrmzZsrPDxcU6dOVXV1tU/Nzp071bdvXwUHB6tLly7Kzc3191IAAEAj1iBncnr06KFTp07Z2549e+y5KVOm6M0339SGDRu0a9cunTx5Unfffbc9X1NTo9TUVF24cEF79+7V6tWrlZubq5kzZ9o1x48fV2pqqgYPHqzi4mJNnjxZjzzyiLZu3doQywEAAI1Q0wY5aNOmioyMvGS8oqJCL774otauXat/+Zd/kSStWrVK3bp107vvvquBAwdq27Zt+uijj7R9+3ZFRESoT58+mjNnjqZNm6bs7Gw5nU6tWLFCcXFxWrBggSSpW7du2rNnjxYtWqSUlJSGWBIAAGhkGuRMzscff6zo6Gh16tRJo0ePVllZmSSpqKhIFy9eVFJSkl0bHx+vDh06qLCwUJJUWFiohIQERURE2DUpKSnyer06fPiwXfPNY9TV1B3jSqqqquT1en02AABgJr+HnMTEROXm5mrLli1avny5jh8/rttuu03nzp2Tx+OR0+lUy5YtfR4TEREhj8cjSfJ4PD4Bp26+bu7barxer7766qsr9paTk6OwsDB7i4mJ8du6AQDAtcXvH1cNGzbM/rtXr15KTExUbGysXn31VTVr1szfT1cvWVlZyszMtPe9Xi9BBwAAQzX4V8hbtmypn//85zp69KgiIyN14cIFnT171qemvLzcvoYnMjLykm9b1e1/V43L5frWIBUcHCyXy+WzAQAAMzV4yDl//ryOHTumqKgo9evXT9ddd50KCgrs+dLSUpWVlcntdkuS3G63Dh06pNOnT9s1+fn5crlc6t69u13zzWPU1dQdAwAAwO8h56mnntKuXbt04sQJ7d27V//6r/+qoKAgjRo1SmFhYUpPT1dmZqbeeecdFRUV6eGHH5bb7dbAgQMlScnJyerevbsefPBBffjhh9q6datmzJihjIwMBQcHS5ImTJigTz75RE8//bSOHDmiZcuW6dVXX9WUKVP8vRwAANBI+f2anM8++0yjRo3SP//5T7Vr10633nqr3n33XbVr106StGjRIjVp0kQjRoxQVVWVUlJStGzZMvvxQUFB2rRpkx577DG53W61aNFCY8eO1ezZs+2auLg4bd68WVOmTNHixYvVvn17vfDCC3x9HAAA2ByWZVmBbiJQvF6vwsLCVFFR4ffrczpO3+zX4+HHd2JuaqBbAABcxvd9/+a3qwAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYKSmgW4AuFZ1nL450C00qBNzUwPdAgA0KM7kAAAAIxFyAACAkQg5AADASIQcAABgJL+HnJycHA0YMEChoaEKDw9XWlqaSktLfWruuOMOORwOn23ChAk+NWVlZUpNTVXz5s0VHh6uqVOnqrq62qdm586d6tu3r4KDg9WlSxfl5ub6ezkAAKCR8nvI2bVrlzIyMvTuu+8qPz9fFy9eVHJysiorK33qHn30UZ06dcre5s2bZ8/V1NQoNTVVFy5c0N69e7V69Wrl5uZq5syZds3x48eVmpqqwYMHq7i4WJMnT9YjjzyirVu3+ntJAACgEfL7V8i3bNnis5+bm6vw8HAVFRVp0KBB9njz5s0VGRl52WNs27ZNH330kbZv366IiAj16dNHc+bM0bRp05SdnS2n06kVK1YoLi5OCxYskCR169ZNe/bs0aJFi5SSkuLvZQEAgEamwa/JqaiokCS1bt3aZ3zNmjVq27atevbsqaysLH355Zf2XGFhoRISEhQREWGPpaSkyOv16vDhw3ZNUlKSzzFTUlJUWFh4xV6qqqrk9Xp9NgAAYKYGvRlgbW2tJk+erFtuuUU9e/a0x++//37FxsYqOjpaBw8e1LRp01RaWqrXXntNkuTxeHwCjiR73+PxfGuN1+vVV199pWbNml3ST05OjmbNmtUgawUAANeWBg05GRkZKikp0Z49e3zGx48fb/+dkJCgqKgoDRkyRMeOHVPnzp0brJ+srCxlZmba+16vVzExMQ32fAAAIHAa7OOqiRMnatOmTXrnnXfUvn37b61NTEyUJB09elSSFBkZqfLycp+auv2663iuVONyuS57FkeSgoOD5XK5fDYAAGAmv4ccy7I0ceJEbdy4UTt27FBcXNx3Pqa4uFiSFBUVJUlyu906dOiQTp8+bdfk5+fL5XKpe/fudk1BQYHPcfLz8+V2u/28IgAA0Bj5PeRkZGTo5Zdf1tq1axUaGiqPxyOPx6OvvvpKknTs2DHNmTNHRUVFOnHihN544w2NGTNGgwYNUq9evSRJycnJ6t69ux588EF9+OGH2rp1q2bMmKGMjAwFBwdLkiZMmKBPPvlETz/9tI4cOaJly5bp1Vdf1ZQpU/y9JAAA0Aj5PeQsX75cFRUVuuOOOxQVFWVv69evlyQ5nU5t375dycnJio+P15NPPqkRI0bozTfftI8RFBSkTZs2KSgoSG63Ww888IDGjBmj2bNn2zVxcXHavHmz8vPz1bt3by1YsEAvvPACXx8HAACSJIdlWVagmwgUr9ersLAwVVRU+P36nI7TN/v1eIC/nZibGugWAOCqfN/3b367CgAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIzUNNANAAiMjtM3B7qFBndibmqgWwAQQJzJAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICR+O0qAMYy/fe5+G0u4NtxJgcAABip0YecpUuXqmPHjgoJCVFiYqLef//9QLcEAACuAY065Kxfv16ZmZl67rnndODAAfXu3VspKSk6ffp0oFsDAAAB1qhDzsKFC/Xoo4/q4YcfVvfu3bVixQo1b95cL730UqBbAwAAAdZoLzy+cOGCioqKlJWVZY81adJESUlJKiwsvOxjqqqqVFVVZe9XVFRIkrxer9/7q6360u/HBIBv6jBlQ6BbaFAls1IC3QKuUXXv25ZlfWtdow05//jHP1RTU6OIiAif8YiICB05cuSyj8nJydGsWbMuGY+JiWmwPgEAVyfs+UB3gGvduXPnFBYWdsX5RhtyrkZWVpYyMzPt/draWn3xxRdq06aNHA6H357H6/UqJiZGn376qVwul9+Oey0xfY2sr/EzfY2sr/EzfY0NuT7LsnTu3DlFR0d/a12jDTlt27ZVUFCQysvLfcbLy8sVGRl52ccEBwcrODjYZ6xly5YN1qPL5TLyf9xvMn2NrK/xM32NrK/xM32NDbW+bzuDU6fRXnjsdDrVr18/FRQU2GO1tbUqKCiQ2+0OaG8AACDwGu2ZHEnKzMzU2LFj1b9/f9100016/vnnVVlZqYcffjjQrQEAgABr1CFn5MiR+vzzzzVz5kx5PB716dNHW7ZsueRi5B9bcHCwnnvuuUs+GjOJ6WtkfY2f6WtkfY2f6Wu8FtbnsL7r+1cAAACNUKO9JgcAAODbEHIAAICRCDkAAMBIhBwAAGAkQo4f7d69W8OHD1d0dLQcDofy8vIC3ZJf5eTkaMCAAQoNDVV4eLjS0tJUWloa6Lb8avny5erVq5d98yq3262333470G01mLlz58rhcGjy5MmBbsUvsrOz5XA4fLb4+PhAt+V3//M//6MHHnhAbdq0UbNmzZSQkKD9+/cHui2/6Nix4yX/DR0OhzIyMgLdml/U1NTot7/9reLi4tSsWTN17txZc+bM+c7fYGpMzp07p8mTJys2NlbNmjXTzTffrH379gWkl0b9FfJrTWVlpXr37q1x48bp7rvvDnQ7frdr1y5lZGRowIABqq6u1jPPPKPk5GR99NFHatGiRaDb84v27dtr7ty5uuGGG2RZllavXq277rpLH3zwgXr06BHo9vxq3759+o//+A/16tUr0K34VY8ePbR9+3Z7v2lTs/6ZO3PmjG655RYNHjxYb7/9ttq1a6ePP/5YrVq1CnRrfrFv3z7V1NTY+yUlJfrFL36he+65J6B9+csf/vAHLV++XKtXr1aPHj20f/9+PfzwwwoLC9MTTzwR6Pb84pFHHlFJSYn+67/+S9HR0Xr55ZeVlJSkjz76SD/72c9+3GYsNAhJ1saNGwPdRoM6ffq0JcnatWtXoFtpUK1atbJeeOGFQLfhV+fOnbNuuOEGKz8/37r99tutSZMmBbolv3juuees3r17B7qNBjVt2jTr1ltvDXQbP5pJkyZZnTt3tmprawPdil+kpqZa48aN8xm7++67rdGjRwesJ3/68ssvraCgIGvTpk0+43379rWeffbZH70fPq7CVauoqJAktW7dOtCtNIiamhqtW7dOlZWVxv1USEZGhlJTU5WUlBToVvzu448/VnR0tDp16qTRo0errKws0C351RtvvKH+/fvrnnvuUXh4uG688Ub953/+Z6DbahAXLlzQyy+/rHHjxvn1R5QD6eabb1ZBQYH+9re/SZI+/PBD7dmzR8OGDQt0a35RXV2tmpoahYSE+Iw3a9ZMe/bs+dH7Mes8Ln40tbW1mjx5sm655Rb17Nkz0O341aFDh+R2u/X111/r+uuv18aNG9W9e/dAt+U369at04EDBwL2GXlDSkxMVG5urrp27apTp05p1qxZuu2221RSUqLQ0NBAt+cXn3zyiZYvX67MzEw988wz2rdvn5544gk5nU6NHTs20O35VV5ens6ePauHHnoo0K34zfTp0+X1ehUfH6+goCDV1NTod7/7nUaPHh3o1vwiNDRUbrdbc+bMUbdu3RQREaFXXnlFhYWF6tKly4/f0I9+7ugnwvSPqyZMmGDFxsZan376aaBb8buqqirr448/tvbv329Nnz7datu2rXX48OFAt+UXZWVlVnh4uPXhhx/aYyZ9XPV/nTlzxnK5XEZ93HjddddZbrfbZ+zxxx+3Bg4cGLCeGkpycrL1y1/+MtBt+NUrr7xitW/f3nrllVesgwcPWn/605+s1q1bW7m5uYFuzW+OHj1qDRo0yJJkBQUFWQMGDLBGjx5txcfH/+i9EHIaiMkhJyMjw2rfvr31ySefBLqVH8WQIUOs8ePHB7oNv9i4caP9D0/dJslyOBxWUFCQVV1dHegW/a5///7W9OnTA92G33To0MFKT0/3GVu2bJkVHR0dsJ4awokTJ6wmTZpYeXl5gW7Fr9q3b28tWbLEZ2zOnDlW165dA9ZTQzl//rx18uRJy7Is695777XuvPPOH70HrsnB92ZZliZOnKiNGzdqx44diouLC3RLP4ra2lpVVVUFug2/GDJkiA4dOqTi4mJ769+/v0aPHq3i4mIFBQUFukW/On/+vI4dO6aoqKhAt+I3t9xyyyW3bvjb3/6m2NjYgPXUEFatWqXw8HClpqYGuhW/+vLLL9Wkie9bb1BQkGprawPWU0Np0aKFoqKidObMGW3dulV33XXXj94D1+T40fnz53X06FF7//jx4youLlbr1q3VoUOHgPbmDxkZGVq7dq1ef/11hYaGyuPxSJLCwsLUrFmzQLfnF1lZWRo2bJg6dOigc+fOae3atdq5c6e2bt0a6Nb8IjQ09JJrqFq0aKE2bdoYcW3VU089peHDhys2NlYnT57Uc889p6CgII0aNSrQrfnNlClTdPPNN+v3v/+97r33Xr3//vtauXKlVq5cGejW/Ka2tlarVq3S2LFjjbsFwPDhw/W73/1OHTp0UI8ePfTBBx9o4cKFGjduXKBb85utW7fKsix17dpVR48e1dSpUxUfH6+HH374x2/mRz93ZLB33nnHknTJNnbs2EC35heXW5ska9WqVYFuzW/GjRtnxcbGWk6n02rXrp01ZMgQa9u2bYFuq0GZdE3OyJEjraioKMvpdFo/+9nPrJEjR1pHjx4NdFt+9+abb1o9e/a0goODrfj4eGvlypWBbsmvtm7dakmySktLA92K33m9XmvSpElWhw4drJCQEKtTp07Ws88+a1VVVQW6Nb9Zv3691alTJ8vpdFqRkZFWRkaGdfbs2YD04rBMus0iAADA/+KaHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACM9P8AMCmCWHBic9EAAAAASUVORK5CYII=",
       "text/plain": [
        "<Figure size 640x480 with 1 Axes>"
       ]
@@ -372,7 +324,7 @@
     }
    ],
    "source": [
-    "plt.hist(mktunebatch(2048)[1].to(torch.float32).cpu(), bins=64)"
+    "plt.hist(mkbatch(2**15, large=True, target=\"onpath\", largetarget=False)[1].cpu(), bins=8)"
    ]
   },
   {
@@ -386,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 14,
    "execution_state": "idle",
    "metadata": {
     "id": "tLOWhg_CeWzH"
@@ -423,7 +375,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 16,
    "execution_state": "idle",
    "metadata": {
     "colab": {
@@ -437,7 +389,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Training data: 1048576K\n",
+      "Training data: 104857K\n",
       "Trainable parameters in the model: 550433\n"
      ]
     }
@@ -446,7 +398,7 @@
     "# PARAMS\n",
     "VOCAB_SIZE = 1 + MAX_VTXS + 1 # pad plus max number of vertices plus target token\n",
     "MODEL_DIM = 64 # Dimension of model (embedding and transformer)\n",
-    "NEPOCHS = 1000\n",
+    "NEPOCHS = 100\n",
     "BSZ = 2**15 # Batch size\n",
     "NHEADS = 2\n",
     "NLAYERS = 11\n",
@@ -474,7 +426,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 20,
    "execution_state": "idle",
    "metadata": {},
    "outputs": [],
@@ -486,7 +438,7 @@
     "# model = torch.compile(model)\n",
     "# model.load_state_dict(torch.load('model.pth', weights_only=True))\n",
     "\n",
-    "LR = 3e-4\n",
+    "LR = 5e-4\n",
     "\n",
     "criterion = nn.MSELoss()\n",
     "optimizer = torch.optim.Adam(model.parameters(), lr=LR)"
@@ -518,21 +470,17 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/sipb/.venv/lib64/python3.12/site-packages/torch/nn/functional.py:6278: UserWarning: Memory Efficient attention on Navi31 GPU is still experimental. Enable it with TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1. (Triggered internally at ../aten/src/ATen/native/transformers/hip/sdp_utils.cpp:269.)\n",
-      "  attn_output = scaled_dot_product_attention(\n",
-      "/home/sipb/.venv/lib64/python3.12/site-packages/torch/_inductor/compile_fx.py:167: UserWarning: TensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.\n",
-      "  warnings.warn(\n",
       "/tmp/torchinductor_sipb/lc/clcqc3ufbzrethiy77dmsu54kurxdmh4eji2f3msm347rhmfpf4j.py:1078: UserWarning: Attempting to use hipBLASLt on an unsupported architecture! Overriding blas backend to hipblas (Triggered internally at ../aten/src/ATen/Context.cpp:296.)\n",
       "  extern_kernels.mm(reinterpret_tensor(buf1, (524288, 64), (64, 1), 0), reinterpret_tensor(primals_5, (64, 192), (1, 64), 0), out=buf2)\n",
       "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:53<00:00,  1.68s/it]"
+      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:42<00:00,  1.33s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0/1000 \t Train Err: 44.25287628173828 15.68527889251709 8.788555145263672 3.8603785037994385 100.71075439453125\n"
+      "Epoch 0/100 \t Train Err: 48.97900426387787 12.91722442075843 7.231296321027912 3.5385852727340534 115.3402452468872\n"
      ]
     },
     {
@@ -547,7 +495,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 1/1000 \t Train Err: 42.749107360839844 19.990596771240234 12.096658706665039 6.1431379318237305 90.65858459472656\n"
+      "Epoch 1/100 \t Train Err: 41.59842586517334 23.066684544086456 14.473573058843613 7.877466633915901 84.72042870521545\n"
      ]
     },
     {
@@ -562,7 +510,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 2/1000 \t Train Err: 35.17692565917969 0.05676012113690376 15.442191123962402 8.584199905395508 82.2193374633789\n"
+      "Epoch 2/100 \t Train Err: 40.415191769599915 29.482473254203796 19.633903205394745 11.77738669514656 73.48623991012573\n"
      ]
     },
     {
@@ -577,22 +525,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 3/1000 \t Train Err: 32.94102478027344 0.0016151332529261708 21.24653434753418 13.028000831604004 70.40452575683594\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 4/1000 \t Train Err: 30.707416534423828 0.0012646751711145043 27.01055335998535 17.616626739501953 60.88502502441406\n"
+      "Epoch 3/100 \t Train Err: 40.01692616939545 32.29490512609482 22.276952624320984 13.907412678003311 68.378258228302\n"
      ]
     },
     {
@@ -607,52 +540,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 5/1000 \t Train Err: 29.5562801361084 0.0010397899895906448 32.37065124511719 21.991931915283203 53.443416595458984\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 6/1000 \t Train Err: 29.092647552490234 0.0007909027044661343 36.97315979003906 25.812442779541016 47.87839126586914\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 7/1000 \t Train Err: 28.649932861328125 0.0005968677578493953 40.76370620727539 28.99480438232422 43.76168441772461\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 8/1000 \t Train Err: 28.49782943725586 0.00046271493192762136 43.50001907348633 31.30975914001465 41.015228271484375\n"
+      "Epoch 4/100 \t Train Err: 31.416786193847656 2.3271487059355422 26.761382937431335 18.088589638471603 60.98844397068024\n"
      ]
     },
     {
@@ -667,7 +555,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 9/1000 \t Train Err: 28.43338394165039 0.00036097457632422447 45.3854866027832 32.91228103637695 39.220703125\n"
+      "Epoch 5/100 \t Train Err: 25.921700596809387 0.22728093068872113 19.57061032950878 15.32799781858921 52.15890157222748\n"
      ]
     },
     {
@@ -682,7 +570,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 10/1000 \t Train Err: 28.3504581451416 0.0002947713655885309 46.44082260131836 33.81328201293945 38.23492431640625\n"
+      "Epoch 6/100 \t Train Err: 17.580547362565994 0.021982330930768512 4.470714939758182 19.619352281093597 36.34695905447006\n"
      ]
     },
     {
@@ -697,67 +585,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 11/1000 \t Train Err: 28.3637638092041 0.00025288635515607893 44.81169509887695 32.499359130859375 39.33035659790039\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 12/1000 \t Train Err: 20.932743072509766 0.020399289205670357 9.31131649017334 9.292236328125 46.657718658447266\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 13/1000 \t Train Err: 15.104679107666016 0.004516741260886192 3.496262311935425 26.43575096130371 27.609149932861328\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 14/1000 \t Train Err: 11.633755683898926 0.006426772102713585 0.33657196164131165 12.253266334533691 24.989166259765625\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 15/1000 \t Train Err: 8.730537414550781 0.005749554373323917 0.4862631857395172 14.309078216552734 15.44126033782959\n"
+      "Epoch 7/100 \t Train Err: 12.400098770856857 0.013556713653088082 0.5100052966736257 23.03424423933029 23.36328774690628\n"
      ]
     },
     {
@@ -772,22 +600,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 16/1000 \t Train Err: 7.443472385406494 0.006111220922321081 0.20445628464221954 5.455012321472168 16.42494773864746\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 17/1000 \t Train Err: 5.684250354766846 0.0027684608940035105 0.17002306878566742 7.258749961853027 9.979669570922852\n"
+      "Epoch 8/100 \t Train Err: 9.38240310549736 0.007538945435953792 0.22025173716247082 19.04077085852623 16.662322163581848\n"
      ]
     },
     {
@@ -802,7 +615,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 18/1000 \t Train Err: 4.853455543518066 0.002522163325920701 0.1455077826976776 5.698103427886963 8.302152633666992\n"
+      "Epoch 9/100 \t Train Err: 7.412262797355652 0.008171883615432307 0.1555994711816311 14.766773402690887 12.345462799072266\n"
      ]
     },
     {
@@ -817,82 +630,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 19/1000 \t Train Err: 4.262304306030273 0.008678397163748741 0.1446814388036728 6.152136325836182 6.787021636962891\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 20/1000 \t Train Err: 3.858119487762451 0.003093192121013999 0.06649590283632278 3.3348793983459473 7.0024285316467285\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 21/1000 \t Train Err: 3.4118576049804688 0.0032999212853610516 0.08424855768680573 4.119073390960693 5.404160976409912\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 22/1000 \t Train Err: 3.108879566192627 0.0038715973496437073 0.09217671304941177 3.321317434310913 4.763245582580566\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 23/1000 \t Train Err: 2.9745163917541504 0.0034261371474713087 0.042425476014614105 2.484790802001953 5.148287296295166\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 24/1000 \t Train Err: 2.8194165229797363 0.0035489683505147696 0.04434378817677498 2.200214385986328 4.857565879821777\n"
+      "Epoch 10/100 \t Train Err: 6.162406742572784 0.017439005838241428 0.14521901519037783 10.868462145328522 10.12831449508667\n"
      ]
     },
     {
@@ -907,37 +645,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 25/1000 \t Train Err: 2.570242404937744 0.0033989306539297104 0.05114512890577316 1.687565565109253 4.485219478607178\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 26/1000 \t Train Err: 2.519033432006836 0.0032220594584941864 0.037310708314180374 1.374830961227417 4.5875935554504395\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 27/1000 \t Train Err: 2.4283571243286133 0.0033829077146947384 0.05773899331688881 1.9254313707351685 3.3634419441223145\n"
+      "Epoch 11/100 \t Train Err: 5.102278828620911 0.006056587655621115 0.07073448912706226 7.790802486240864 8.321609899401665\n"
      ]
     },
     {
@@ -952,97 +660,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 28/1000 \t Train Err: 2.302886962890625 0.0026804266963154078 0.03176456317305565 1.1421442031860352 4.208995819091797\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 29/1000 \t Train Err: 2.1564438343048096 0.0029864327516406775 0.05358732491731644 1.2556946277618408 3.505455255508423\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 30/1000 \t Train Err: 2.1266884803771973 0.0027816162910312414 0.0484829805791378 1.4444752931594849 2.9733316898345947\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 31/1000 \t Train Err: 2.0390195846557617 0.0022390384692698717 0.03866223618388176 1.1827622652053833 2.866259813308716\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 32/1000 \t Train Err: 1.9310557842254639 0.007157730869948864 0.04426664113998413 1.3611301183700562 2.6592557430267334\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 33/1000 \t Train Err: 1.851065754890442 0.00836748257279396 0.02673536352813244 0.9250126481056213 2.746307373046875\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 34/1000 \t Train Err: 1.8189555406570435 0.002403518883511424 0.027730008587241173 0.7969512939453125 3.080704689025879\n"
+      "Epoch 12/100 \t Train Err: 4.366904504597187 0.005133508995641023 0.059961416525766253 5.993938364088535 6.998718574643135\n"
      ]
     },
     {
@@ -1057,187 +675,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 35/1000 \t Train Err: 1.7624272108078003 0.0018843680154532194 0.0399935357272625 1.2561802864074707 2.1936941146850586\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 36/1000 \t Train Err: 1.6249182224273682 0.004734295420348644 0.031796831637620926 0.9225327372550964 2.0132899284362793\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 37/1000 \t Train Err: 1.567882776260376 0.0021626888774335384 0.023232363164424896 0.6002436876296997 2.547755718231201\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 38/1000 \t Train Err: 1.5313199758529663 0.00217650830745697 0.026524055749177933 0.7098141312599182 2.2419848442077637\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 39/1000 \t Train Err: 1.4404828548431396 0.004386692773550749 0.02224084362387657 0.4259363114833832 2.4182753562927246\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 40/1000 \t Train Err: 1.2733204364776611 0.0018442481523379683 0.018874434754252434 0.42629194259643555 2.229574680328369\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 41/1000 \t Train Err: 1.259386658668518 0.003313510213047266 0.029421737417578697 0.5856508612632751 1.8896589279174805\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 42/1000 \t Train Err: 1.2521576881408691 0.004783857148140669 0.01938166655600071 0.3461814820766449 2.3111233711242676\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 43/1000 \t Train Err: 1.2292133569717407 0.002071063034236431 0.021942850202322006 0.4656696617603302 2.068932056427002\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 44/1000 \t Train Err: 1.128746747970581 0.003524728585034609 0.02287750504910946 0.45642775297164917 1.7306324243545532\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 45/1000 \t Train Err: 1.0967061519622803 0.01454948354512453 0.024551132693886757 0.54127436876297 1.5558075904846191\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 46/1000 \t Train Err: 1.0431199073791504 0.012847564183175564 0.04218384996056557 0.6190078258514404 1.189369559288025\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 47/1000 \t Train Err: 1.0133185386657715 0.0021092321258038282 0.013825979083776474 0.2770753800868988 1.6761137247085571\n"
+      "Epoch 13/100 \t Train Err: 3.831405833363533 0.003056721754546743 0.04472046362934634 4.653905652463436 6.165115922689438\n"
      ]
     },
     {
@@ -1252,7 +690,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 48/1000 \t Train Err: 0.9567054510116577 0.004586610943078995 0.014754511415958405 0.3456564247608185 1.453568696975708\n"
+      "Epoch 14/100 \t Train Err: 3.4279200956225395 0.004735801303468179 0.03825691540259868 3.906827114522457 5.425094351172447\n"
      ]
     },
     {
@@ -1267,7 +705,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49/1000 \t Train Err: 0.9407138228416443 0.0070776138454675674 0.02054545097053051 0.4561208486557007 1.220456838607788\n"
+      "Epoch 15/100 \t Train Err: 3.138390600681305 0.005375595836085267 0.03724290645914152 3.326357141137123 4.948893174529076\n"
      ]
     },
     {
@@ -1282,7 +720,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 50/1000 \t Train Err: 0.9202176332473755 0.0031614142935723066 0.011641060933470726 0.286685585975647 1.5036773681640625\n"
+      "Epoch 16/100 \t Train Err: 2.899445064365864 0.006989890585828107 0.041147086303681135 2.920688170939684 4.558141328394413\n"
      ]
     },
     {
@@ -1297,97 +735,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 51/1000 \t Train Err: 0.8502955436706543 0.004485825542360544 0.014037835411727428 0.261470228433609 1.4015151262283325\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 52/1000 \t Train Err: 0.8055517673492432 0.001586323487572372 0.01207085233181715 0.25895756483078003 1.2997738122940063\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 53/1000 \t Train Err: 0.745665431022644 0.0011599217541515827 0.011456611566245556 0.3580838143825531 1.1415109634399414\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 54/1000 \t Train Err: 0.7061874866485596 0.0031232465989887714 0.011377043090760708 0.3388000428676605 0.9392197728157043\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 55/1000 \t Train Err: 0.6648885607719421 0.0012688999995589256 0.00947241485118866 0.26360955834388733 1.1422845125198364\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 56/1000 \t Train Err: 0.7076109051704407 0.0024704698007553816 0.009398349560797215 0.3038281798362732 1.1086375713348389\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 57/1000 \t Train Err: 0.6785869598388672 0.0014807500410825014 0.01062911655753851 0.2191060185432434 1.1564222574234009\n"
+      "Epoch 17/100 \t Train Err: 2.6482545658946037 0.006691730450256728 0.03093722724588588 2.4344960935413837 4.143990509212017\n"
      ]
     },
     {
@@ -1402,37 +750,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 58/1000 \t Train Err: 0.6877794861793518 0.0014096886152401567 0.009400570765137672 0.30001187324523926 1.0206444263458252\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 59/1000 \t Train Err: 0.6211211681365967 0.0020437357015907764 0.007910181768238544 0.25007426738739014 0.9663841724395752\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 60/1000 \t Train Err: 0.6470574736595154 0.002864431357011199 0.009593289345502853 0.2109789252281189 1.1501246690750122\n"
+      "Epoch 18/100 \t Train Err: 2.4724042862653732 0.013588363406597637 0.03676938998978585 2.2025532834231853 3.8459226489067078\n"
      ]
     },
     {
@@ -1447,7 +765,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 61/1000 \t Train Err: 0.5603711605072021 0.0008608726784586906 0.007078849244862795 0.20478710532188416 0.8139731884002686\n"
+      "Epoch 19/100 \t Train Err: 2.2938634902238846 0.007989803227246739 0.031059849599841982 1.9167942628264427 3.582200199365616\n"
      ]
     },
     {
@@ -1462,7 +780,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 62/1000 \t Train Err: 0.578612208366394 0.0032247111666947603 0.00902671180665493 0.42988908290863037 0.7487800717353821\n"
+      "Epoch 20/100 \t Train Err: 2.0970346182584763 0.004351407576905331 0.02517502213595435 1.652792677283287 3.244159609079361\n"
      ]
     },
     {
@@ -1477,352 +795,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 63/1000 \t Train Err: 0.5186704397201538 0.00102539814542979 0.008104916661977768 0.2222120761871338 0.7046389579772949\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 64/1000 \t Train Err: 0.5231799483299255 0.002828507451340556 0.010938749648630619 0.2966611087322235 0.7127745747566223\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 65/1000 \t Train Err: 0.47884172201156616 0.0009621918434277177 0.006440795958042145 0.18400059640407562 0.704879105091095\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 66/1000 \t Train Err: 0.5025489926338196 0.0009218254708684981 0.010015169158577919 0.31187739968299866 0.6326962113380432\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 67/1000 \t Train Err: 0.43556588888168335 0.0009745100978761911 0.004804776981472969 0.18548405170440674 0.7211376428604126\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 68/1000 \t Train Err: 0.4796810746192932 0.0011300166370347142 0.00498929712921381 0.19455471634864807 0.7402910590171814\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 69/1000 \t Train Err: 0.4702203869819641 0.0010465772356837988 0.00821524765342474 0.19802263379096985 0.6487798094749451\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 70/1000 \t Train Err: 0.4697670042514801 0.0023691540118306875 0.01255676057189703 0.12256406992673874 0.7824207544326782\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 71/1000 \t Train Err: 0.45800718665122986 0.000834575854241848 0.006560661364346743 0.31839612126350403 0.5303618311882019\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 72/1000 \t Train Err: 0.4298030138015747 0.0008242715266533196 0.004054277669638395 0.12216580659151077 0.7027520537376404\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 73/1000 \t Train Err: 0.41886794567108154 0.003492131596431136 0.006118918769061565 0.13266974687576294 0.5819666981697083\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 74/1000 \t Train Err: 0.4155438542366028 0.005384758580476046 0.004661516286432743 0.18202976882457733 0.46242862939834595\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 75/1000 \t Train Err: 0.4029538929462433 0.0027740243822336197 0.004722914192825556 0.11864388734102249 0.6423448920249939\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 76/1000 \t Train Err: 0.40730276703834534 0.0007228117901831865 0.004857086110860109 0.19555920362472534 0.5367923974990845\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 77/1000 \t Train Err: 0.36605364084243774 0.0009539870661683381 0.004075151868164539 0.07661056518554688 0.6930822730064392\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 78/1000 \t Train Err: 0.35346490144729614 0.0008490128675475717 0.011079245246946812 0.2623752951622009 0.35006481409072876\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 79/1000 \t Train Err: 0.36204081773757935 0.0009433329687453806 0.00526107894256711 0.21243207156658173 0.48520198464393616\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 80/1000 \t Train Err: 0.37033170461654663 0.007113362662494183 0.025272028520703316 0.1579497754573822 0.509062647819519\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 81/1000 \t Train Err: 0.33435767889022827 0.0053968350403010845 0.009334199130535126 0.08819041401147842 0.5340674519538879\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 82/1000 \t Train Err: 0.33757588267326355 0.0005324012017808855 0.004930454771965742 0.17805498838424683 0.5068127512931824\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 83/1000 \t Train Err: 0.31310147047042847 0.0008425716659985483 0.002387900371104479 0.0860978364944458 0.5014434456825256\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 84/1000 \t Train Err: 0.2939777672290802 0.0005211451789364219 0.0028972462750971317 0.09632651507854462 0.32632964849472046\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 85/1000 \t Train Err: 0.299526184797287 0.0010479650227352977 0.0028079829644411802 0.21169798076152802 0.40237608551979065\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 86/1000 \t Train Err: 0.30821743607521057 0.0029116582591086626 0.0035962064284831285 0.12347913533449173 0.4878503084182739\n"
+      "Epoch 21/100 \t Train Err: 1.974123526364565 0.003171493917761836 0.02390849226503633 1.4561462700366974 3.0673259645700455\n"
      ]
     },
     {
@@ -1837,112 +810,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 87/1000 \t Train Err: 0.30215075612068176 0.0008221607422456145 0.002995165530592203 0.14785777032375336 0.37114524841308594\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 88/1000 \t Train Err: 0.28064608573913574 0.0016746899345889688 0.0033167945221066475 0.12754225730895996 0.36404597759246826\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 89/1000 \t Train Err: 0.3496437072753906 0.00890024658292532 0.004831824917346239 0.15963110327720642 0.61163729429245\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 90/1000 \t Train Err: 0.25926288962364197 0.0013453153660520911 0.003530274610966444 0.1070982962846756 0.3172129690647125\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 91/1000 \t Train Err: 0.27578499913215637 0.0030208230018615723 0.0031245634891092777 0.11023195832967758 0.3903992176055908\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 92/1000 \t Train Err: 0.2994685769081116 0.004197238944470882 0.0023698117583990097 0.1903829723596573 0.4574669599533081\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 93/1000 \t Train Err: 0.2922815680503845 0.0007029320695437491 0.002348515437915921 0.05019014701247215 0.5161943435668945\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 94/1000 \t Train Err: 0.2519854009151459 0.0017584029119461775 0.002146892249584198 0.09622428566217422 0.3795848488807678\n"
+      "Epoch 22/100 \t Train Err: 1.8447572737932205 0.0030846048357489053 0.021993933914927766 1.3033475428819656 2.8325533121824265\n"
      ]
     },
     {
@@ -1957,307 +825,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 95/1000 \t Train Err: 0.2682843506336212 0.0005638344446197152 0.002339219441637397 0.10693421214818954 0.41891878843307495\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 96/1000 \t Train Err: 0.23551344871520996 0.0013522340450435877 0.002122987760230899 0.07678966224193573 0.39242544770240784\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 97/1000 \t Train Err: 0.22959834337234497 0.0011262426851317286 0.005589182022958994 0.09626860916614532 0.3728175759315491\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 98/1000 \t Train Err: 0.25520408153533936 0.00047998130321502686 0.0021106365602463484 0.06091078370809555 0.46852487325668335\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 99/1000 \t Train Err: 0.2501460909843445 0.0004842414637096226 0.0019760928116738796 0.08570502698421478 0.2884354889392853\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 100/1000 \t Train Err: 0.20366686582565308 0.002971487818285823 0.00713342847302556 0.11135696619749069 0.31001028418540955\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 101/1000 \t Train Err: 0.24212954938411713 0.002736428752541542 0.006694257725030184 0.07518018037080765 0.4043455421924591\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 102/1000 \t Train Err: 0.21833863854408264 0.0023800276685506105 0.0043370905332267284 0.06609099358320236 0.32194095849990845\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 103/1000 \t Train Err: 0.17334935069084167 0.0016206795116886497 0.002039699349552393 0.07832205295562744 0.2227916121482849\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 104/1000 \t Train Err: 0.22861380875110626 0.00032782970811240375 0.0014689437812194228 0.04350364953279495 0.3887456953525543\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 105/1000 \t Train Err: 0.19524142146110535 0.0035212738439440727 0.006021957378834486 0.11795622110366821 0.25324639678001404\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 106/1000 \t Train Err: 0.15101337432861328 0.0010637118248268962 0.006495238747447729 0.07132977992296219 0.20919014513492584\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 107/1000 \t Train Err: 0.16016799211502075 0.0004500410286709666 0.0017898066435009241 0.05250254645943642 0.23433028161525726\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 108/1000 \t Train Err: 0.16674524545669556 0.0010276080574840307 0.004126560874283314 0.11649245023727417 0.1926332712173462\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 109/1000 \t Train Err: 0.1631726324558258 0.000415267248172313 0.0019610195886343718 0.047292474657297134 0.2021452933549881\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 110/1000 \t Train Err: 0.15775498747825623 0.000414436828577891 0.0015739103546366096 0.09467942267656326 0.19835522770881653\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 111/1000 \t Train Err: 0.1562364548444748 0.0061783152632415295 0.0036767187993973494 0.05223577097058296 0.2691049575805664\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 112/1000 \t Train Err: 0.12982504069805145 0.006277484819293022 0.0025301864370703697 0.0412493497133255 0.1602799892425537\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 113/1000 \t Train Err: 0.16289404034614563 0.0005659068119712174 0.0012484622420743108 0.056075569242239 0.21527709066867828\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 114/1000 \t Train Err: 0.1465548872947693 0.0014426918933168054 0.0027563830371946096 0.05711502209305763 0.18701669573783875\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 115/1000 \t Train Err: 0.14275527000427246 0.000653824012260884 0.0014178442070260644 0.0954805463552475 0.21085317432880402\n"
+      "Epoch 23/100 \t Train Err: 1.6970642134547234 0.002785886717902031 0.020618034002836794 1.1004324741661549 2.604609090834856\n"
      ]
     },
     {
@@ -2272,22 +840,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 116/1000 \t Train Err: 0.15016044676303864 0.000818660540971905 0.0016999151557683945 0.06580580770969391 0.1766045093536377\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 117/1000 \t Train Err: 0.12201909720897675 0.0015263869427144527 0.0018288405844941735 0.041804831475019455 0.1663890928030014\n"
+      "Epoch 24/100 \t Train Err: 1.5955667905509472 0.003940593182051089 0.02032866739318706 0.976159205660224 2.466688357293606\n"
      ]
     },
     {
@@ -2302,7 +855,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 118/1000 \t Train Err: 0.12489122897386551 0.00029311463003978133 0.001244824263267219 0.025852475315332413 0.14584743976593018\n"
+      "Epoch 25/100 \t Train Err: 1.4617242440581322 0.002876215428841533 0.018645043048309162 0.8556559775024652 2.229958161711693\n"
      ]
     },
     {
@@ -2317,52 +870,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 119/1000 \t Train Err: 0.13460129499435425 0.010341204702854156 0.006242326460778713 0.04511499032378197 0.2327692061662674\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 120/1000 \t Train Err: 0.1339903473854065 0.0011800267966464162 0.0022739972919225693 0.032896753400564194 0.15254607796669006\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 121/1000 \t Train Err: 0.12022703886032104 0.00038554586353711784 0.004875303246080875 0.06586042791604996 0.1974797397851944\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 122/1000 \t Train Err: 0.11403584480285645 0.0008288199896924198 0.0015265692491084337 0.02858065813779831 0.17847180366516113\n"
+      "Epoch 26/100 \t Train Err: 1.3831324987113476 0.004849277267567231 0.02050616717315279 0.7726096417754889 2.099547818303108\n"
      ]
     },
     {
@@ -2377,22 +885,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 123/1000 \t Train Err: 0.10409620404243469 0.0009630229906179011 0.0013286847388371825 0.07061317563056946 0.12211479991674423\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 124/1000 \t Train Err: 0.09401391446590424 0.0007407785742543638 0.001438300940208137 0.04962582886219025 0.11180052161216736\n"
+      "Epoch 27/100 \t Train Err: 1.311477318406105 0.0024031923712755088 0.01927993548451923 0.7051676390692592 1.976257335394621\n"
      ]
     },
     {
@@ -2407,22 +900,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 125/1000 \t Train Err: 0.11572305113077164 0.0022304926533252 0.0015191843267530203 0.044924236834049225 0.16141481697559357\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 126/1000 \t Train Err: 0.09036736190319061 0.0019903378561139107 0.000955000170506537 0.041499681770801544 0.07043104618787766\n"
+      "Epoch 28/100 \t Train Err: 1.207214828580618 0.0035346322583791334 0.018441367952618748 0.6115700239315629 1.8264076933264732\n"
      ]
     },
     {
@@ -2437,7 +915,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 127/1000 \t Train Err: 0.10687348991632462 0.0012028561905026436 0.0007425333024002612 0.03638176992535591 0.15492156147956848\n"
+      "Epoch 29/100 \t Train Err: 1.1089427508413792 0.00329247322952142 0.0165127256186679 0.5199616495519876 1.6691240929067135\n"
      ]
     },
     {
@@ -2452,142 +930,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 128/1000 \t Train Err: 0.10416512191295624 0.00024147499061655253 0.0009293883922509849 0.028597401455044746 0.18022169172763824\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 129/1000 \t Train Err: 0.08835020661354065 0.003051575506106019 0.003711810801178217 0.05737682431936264 0.120809406042099\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 130/1000 \t Train Err: 0.09325292706489563 0.0029582607094198465 0.0012874709209427238 0.048724617809057236 0.09893109649419785\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 131/1000 \t Train Err: 0.0896429568529129 0.00045463963760994375 0.0013198753586038947 0.035679738968610764 0.10701773315668106\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 132/1000 \t Train Err: 0.08471657335758209 0.0019452492706477642 0.0034233005717396736 0.0296705961227417 0.06480914354324341\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 133/1000 \t Train Err: 0.09226851165294647 0.0015877934638410807 0.002174847759306431 0.02515079453587532 0.13485927879810333\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 134/1000 \t Train Err: 0.08039496093988419 0.0059851668775081635 0.003604266792535782 0.023677635937929153 0.10068632662296295\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 135/1000 \t Train Err: 0.07723499834537506 0.0029561370611190796 0.0013383673503994942 0.02225509285926819 0.07217926532030106\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 136/1000 \t Train Err: 0.08774624764919281 0.0015881237341091037 0.0009627199615351856 0.03728866204619408 0.1386537104845047\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 137/1000 \t Train Err: 0.09112915396690369 0.0011313623981550336 0.000519853550940752 0.02461346611380577 0.09449820965528488\n"
+      "Epoch 30/100 \t Train Err: 1.029175629839301 0.002865927770471899 0.01539153911289759 0.4984441949054599 1.5174495466053486\n"
      ]
     },
     {
@@ -2602,7 +945,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 138/1000 \t Train Err: 0.0764939934015274 0.0015739547088742256 0.002696078270673752 0.02539546601474285 0.07497072219848633\n"
+      "Epoch 31/100 \t Train Err: 0.9538682177662849 0.0023230986480484717 0.01537516585085541 0.4138944335281849 1.406083919107914\n"
      ]
     },
     {
@@ -2617,7 +960,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 139/1000 \t Train Err: 0.08085883408784866 0.0021111860405653715 0.002203426556661725 0.038074932992458344 0.09940771758556366\n"
+      "Epoch 32/100 \t Train Err: 0.8980342578142881 0.0015845673569856444 0.014839567360468209 0.40191424917429686 1.3131387010216713\n"
      ]
     },
     {
@@ -2632,7 +975,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 140/1000 \t Train Err: 0.06707267463207245 0.00048505759332329035 0.0005829626461490989 0.023159010335803032 0.06615001708269119\n"
+      "Epoch 33/100 \t Train Err: 0.8195682223886251 0.0034502551516197855 0.014685693866340443 0.3501896969974041 1.191128795966506\n"
      ]
     },
     {
@@ -2647,142 +990,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 141/1000 \t Train Err: 0.06301475316286087 0.0009192335419356823 0.0005843854160048068 0.03602571412920952 0.0841449722647667\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 142/1000 \t Train Err: 0.06407696008682251 0.004187325481325388 0.0031896685250103474 0.025413284078240395 0.0635743960738182\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 143/1000 \t Train Err: 0.056899648159742355 0.0012612127466127276 0.0010306687327101827 0.021679643541574478 0.05377352237701416\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 144/1000 \t Train Err: 0.07650666683912277 0.00024983700131997466 0.001161872292868793 0.03564540296792984 0.08232379704713821\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 145/1000 \t Train Err: 0.05720144882798195 0.0014120059786364436 0.0017668631626293063 0.03595374524593353 0.06960506737232208\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 146/1000 \t Train Err: 0.0696098804473877 0.00025453948182985187 0.0005893263150937855 0.018764346837997437 0.03399818018078804\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 147/1000 \t Train Err: 0.06046349182724953 0.000990899046882987 0.0004648214380722493 0.02092691697180271 0.02779306285083294\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 148/1000 \t Train Err: 0.059290580451488495 0.0022199612576514482 0.001315723406150937 0.023515846580266953 0.07406013458967209\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 149/1000 \t Train Err: 0.06473296880722046 0.0018222469370812178 0.001621031784452498 0.03560686111450195 0.06342668831348419\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 150/1000 \t Train Err: 0.053641099482774734 0.0003684388066176325 0.0008896152721717954 0.026580102741718292 0.07826311886310577\n"
+      "Epoch 34/100 \t Train Err: 0.7459018900990486 0.0021442237539304188 0.014179074845742434 0.31923429761081934 1.0604591444134712\n"
      ]
     },
     {
@@ -2797,97 +1005,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 151/1000 \t Train Err: 0.05356581509113312 7.923251541797072e-05 0.00026825847453437746 0.018474940210580826 0.06338636577129364\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 152/1000 \t Train Err: 0.053509317338466644 0.000861925829667598 0.0006878876592963934 0.03851037099957466 0.045843761414289474\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 153/1000 \t Train Err: 0.053101420402526855 9.4662478659302e-05 0.0003207987465430051 0.020021196454763412 0.06933460384607315\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 154/1000 \t Train Err: 0.04212021827697754 0.00016945773677434772 0.0002675392315723002 0.01669382117688656 0.040745120495557785\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 155/1000 \t Train Err: 0.038299448788166046 0.0006350624025799334 0.0006876391125842929 0.021383486688137054 0.03851837292313576\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 156/1000 \t Train Err: 0.05666494742035866 0.0003512321272864938 0.00041459291242063046 0.02086237445473671 0.0636894553899765\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 157/1000 \t Train Err: 0.03654496371746063 7.953925523906946e-05 0.00024021716671995819 0.016982462257146835 0.03953758254647255\n"
+      "Epoch 35/100 \t Train Err: 0.7101908139884472 0.0032578711288806517 0.014129422343103215 0.2923557236790657 1.0151417199522257\n"
      ]
     },
     {
@@ -2902,7 +1020,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 158/1000 \t Train Err: 0.04525972530245781 0.00030174560379236937 0.00044638325925916433 0.011691144667565823 0.041540782898664474\n"
+      "Epoch 36/100 \t Train Err: 0.6644597053527832 0.0026260755184921436 0.012881889037089422 0.257190125528723 0.9443178754299879\n"
      ]
     },
     {
@@ -2917,52 +1035,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 159/1000 \t Train Err: 0.039281442761421204 0.0004989005392417312 0.0007457975880242884 0.02650555409491062 0.03530228137969971\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 160/1000 \t Train Err: 0.04518686234951019 0.00017189154459629208 0.00015555098070763052 0.01069786585867405 0.0570724681019783\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 161/1000 \t Train Err: 0.050255514681339264 0.00024805148132145405 0.0007079236092977226 0.02650279738008976 0.06579962372779846\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 162/1000 \t Train Err: 0.044423386454582214 0.00031116121681407094 0.0003777859383262694 0.017297720536589622 0.051457729190588\n"
+      "Epoch 37/100 \t Train Err: 0.6029678452759981 0.0022337357859214535 0.013130559556884691 0.232837010640651 0.837149228900671\n"
      ]
     },
     {
@@ -2977,112 +1050,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 163/1000 \t Train Err: 0.04354090988636017 0.0005877528456039727 0.00029637134866788983 0.013659253716468811 0.020231008529663086\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 164/1000 \t Train Err: 0.031157420948147774 9.798325481824577e-05 0.0003091402177233249 0.01797928847372532 0.023115914314985275\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 165/1000 \t Train Err: 0.04593996703624725 8.030498429434374e-05 0.0002926443121396005 0.013717164285480976 0.055002935230731964\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 166/1000 \t Train Err: 0.03411879763007164 0.0003377409011591226 0.0002743960649240762 0.01241423562169075 0.01924932189285755\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 167/1000 \t Train Err: 0.04760969430208206 0.00011350985732860863 0.00043987829121761024 0.014360723085701466 0.02648620493710041\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 168/1000 \t Train Err: 0.03372378274798393 0.0002949015761259943 0.00037480168975889683 0.014883959665894508 0.016884008422493935\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 169/1000 \t Train Err: 0.03702085092663765 0.00018087019270751625 0.0006288738804869354 0.024845065549016 0.04383993148803711\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 170/1000 \t Train Err: 0.02824767678976059 0.0003424994938541204 0.0004191694315522909 0.015994146466255188 0.014700128696858883\n"
+      "Epoch 38/100 \t Train Err: 0.5509444028139114 0.0019162936450811685 0.012294809013837948 0.2053068270906806 0.7741311714053154\n"
      ]
     },
     {
@@ -3097,157 +1065,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 171/1000 \t Train Err: 0.03430527076125145 0.0009072513785213232 0.0003185459354426712 0.015698766335844994 0.011771176941692829\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 172/1000 \t Train Err: 0.03365296125411987 0.00030303309904411435 0.00022830757370684296 0.017944660037755966 0.02212398685514927\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 173/1000 \t Train Err: 0.041653022170066833 0.000587280432227999 0.0007374075939878821 0.015334094874560833 0.06751907616853714\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 174/1000 \t Train Err: 0.03477595001459122 0.0009183208458125591 0.00013558704813476652 0.013238264247775078 0.01334806066006422\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 175/1000 \t Train Err: 0.037530943751335144 0.00025187639403156936 0.00033183913910761476 0.01504728477448225 0.031140420585870743\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 176/1000 \t Train Err: 0.027848348021507263 0.0014607060002163053 0.001157138729467988 0.011170606128871441 0.01426300685852766\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 177/1000 \t Train Err: 0.03245190531015396 0.0002554329694248736 0.00015877540863584727 0.014040789566934109 0.04714728519320488\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 178/1000 \t Train Err: 0.041977256536483765 0.0001379920431645587 0.00021775254572276026 0.009346511214971542 0.01231997087597847\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 179/1000 \t Train Err: 0.03204528987407684 9.879021672531962e-05 0.0003249702858738601 0.01157579105347395 0.032357051968574524\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 180/1000 \t Train Err: 0.030494939535856247 5.318508192431182e-05 0.0001941232185345143 0.01810402423143387 0.03970681130886078\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 181/1000 \t Train Err: 0.03218014910817146 0.0006771694752387702 0.00016017410962376744 0.009343582205474377 0.03883388638496399\n"
+      "Epoch 39/100 \t Train Err: 0.5144137293100357 0.0014641922125520068 0.009304212289862335 0.18962949723936617 0.6982439709827304\n"
      ]
     },
     {
@@ -3262,67 +1080,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 182/1000 \t Train Err: 0.03439633548259735 8.35641985759139e-05 0.0008245277567766607 0.026584787294268608 0.04398134723305702\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 183/1000 \t Train Err: 0.03437873348593712 0.0003609458508435637 0.0001779649028321728 0.011353005655109882 0.022845609113574028\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 184/1000 \t Train Err: 0.029747625812888145 0.0005436437204480171 0.0006577487220056355 0.02241024561226368 0.03428546339273453\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 185/1000 \t Train Err: 0.03702589124441147 0.001318416208960116 0.0003100436006207019 0.006715381983667612 0.012282946147024632\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 186/1000 \t Train Err: 0.022873839363455772 0.00014723198546562344 0.0011230326490476727 0.015240584500133991 0.020318059250712395\n"
+      "Epoch 40/100 \t Train Err: 0.4911631550639868 0.0034183577554358635 0.011847828980535269 0.17764808260835707 0.6830073017627001\n"
      ]
     },
     {
@@ -3337,127 +1095,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 187/1000 \t Train Err: 0.02688605710864067 0.00014236278366297483 0.00022871489636600018 0.017673302441835403 0.029775310307741165\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 188/1000 \t Train Err: 0.026118462905287743 3.561102130333893e-05 0.00015905339387245476 0.010189777240157127 0.01750301755964756\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 189/1000 \t Train Err: 0.031032036989927292 6.626216054428369e-05 0.0001567322324262932 0.014112681150436401 0.03739028424024582\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 190/1000 \t Train Err: 0.02181524783372879 0.00023006339324638247 0.0005933582433499396 0.010394968092441559 0.00304942368529737\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 191/1000 \t Train Err: 0.027184132486581802 0.004482210148125887 0.0019434703281149268 0.010643635876476765 0.018308935686945915\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 192/1000 \t Train Err: 0.024762475863099098 0.00021026897593401372 0.00068919628392905 0.013836676254868507 0.00759515818208456\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 193/1000 \t Train Err: 0.027485240250825882 5.092989158583805e-05 0.00041440658969804645 0.007257797755300999 0.029338931664824486\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 194/1000 \t Train Err: 0.022330544888973236 0.0004199454269837588 0.0004707501211669296 0.019549088552594185 0.0023543694987893105\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 195/1000 \t Train Err: 0.022276023402810097 6.339305400615558e-05 0.00018617554451338947 0.012746947817504406 0.02517220377922058\n"
+      "Epoch 41/100 \t Train Err: 0.44534157309681177 0.002719711333156738 0.011579871497815475 0.17093974631279707 0.5986328851431608\n"
      ]
     },
     {
@@ -3472,172 +1110,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 196/1000 \t Train Err: 0.02131063863635063 6.503217446152121e-05 0.0001230579655384645 0.009538630954921246 0.010168644599616528\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 197/1000 \t Train Err: 0.021132078021764755 0.0002956095850095153 8.20207133074291e-05 0.006673957221210003 0.010186844505369663\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 198/1000 \t Train Err: 0.019411560148000717 0.00042201197356916964 0.0009095012792386115 0.013239831663668156 0.018353110179305077\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 199/1000 \t Train Err: 0.03216658905148506 4.9469184887129813e-05 0.0002943709841929376 0.01148073747754097 0.016176927834749222\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 200/1000 \t Train Err: 0.02571428008377552 0.0010263347066938877 0.0005481779226101935 0.016015449538826942 0.01978922262787819\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 201/1000 \t Train Err: 0.026272011920809746 3.759583705686964e-05 9.925611811922863e-05 0.008123734965920448 0.01331140287220478\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 202/1000 \t Train Err: 0.03202397748827934 6.046362250344828e-05 0.000284630514215678 0.011763068847358227 0.04103183373808861\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 203/1000 \t Train Err: 0.012851575389504433 0.00011363952944520861 0.00016413710545748472 0.010319913737475872 0.002792573068290949\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 204/1000 \t Train Err: 0.026115499436855316 0.0005245269276201725 0.00024940649745985866 0.011106887832283974 0.02185993455350399\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 205/1000 \t Train Err: 0.018948335200548172 2.8755090170307085e-05 8.58057028381154e-05 0.005748868454247713 0.015843190252780914\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 206/1000 \t Train Err: 0.018574167042970657 0.00015149133105296642 0.00048761311336420476 0.02577385865151882 0.013325332663953304\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 207/1000 \t Train Err: 0.02082146145403385 0.000238466338487342 0.00023394331219606102 0.006748242769390345 0.019078979268670082\n"
+      "Epoch 42/100 \t Train Err: 0.4234167579561472 0.0020350335244074813 0.010087796414154582 0.1561200835276395 0.5679095359519124\n"
      ]
     },
     {
@@ -3652,37 +1125,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 208/1000 \t Train Err: 0.014403259381651878 6.994893192313612e-05 0.0008789921994321048 0.010097990743815899 0.00996581930667162\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 209/1000 \t Train Err: 0.020921753719449043 0.0004979136865586042 0.00010895056766457856 0.00956189725548029 0.015378139913082123\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 210/1000 \t Train Err: 0.022038539871573448 0.0001340815215371549 0.00015701379743404686 0.014402204193174839 0.02424498274922371\n"
+      "Epoch 43/100 \t Train Err: 0.38217254262417555 0.001672563766987878 0.008801718227914535 0.1451707179658115 0.5035076225176454\n"
      ]
     },
     {
@@ -3690,396 +1133,21 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 211/1000 \t Train Err: 0.01667097397148609 0.00011134350643260404 0.00014891373575665057 0.00600818358361721 0.009433449245989323\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 212/1000 \t Train Err: 0.01602860540151596 4.298926069168374e-05 7.710252975812182e-05 0.004729713778942823 0.005112841725349426\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 213/1000 \t Train Err: 0.01903412863612175 0.00030714491731487215 0.0001821738842409104 0.005955686792731285 0.022314537316560745\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 214/1000 \t Train Err: 0.015248223207890987 0.00026072614127770066 0.00019891293777618557 0.007477042265236378 0.0036678414326161146\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 215/1000 \t Train Err: 0.014912809245288372 0.00047885856474749744 0.0004725187609437853 0.00442067626863718 0.006714401766657829\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 216/1000 \t Train Err: 0.022600673139095306 5.488095121108927e-05 0.0005237676086835563 0.0061714984476566315 0.03166600689291954\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 217/1000 \t Train Err: 0.022118201479315758 8.072196214925498e-05 0.00010541417577769607 0.00999145582318306 0.022547846660017967\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 218/1000 \t Train Err: 0.01601581647992134 3.067262514377944e-05 0.00018664993694983423 0.004181408789008856 0.013367431238293648\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 219/1000 \t Train Err: 0.01162786316126585 5.145326576894149e-05 0.00015623572107870132 0.006794607732445002 0.001922846888191998\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 220/1000 \t Train Err: 0.012896351516246796 0.0003044742625206709 9.53769194893539e-05 0.006998015101999044 0.003260016907006502\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 221/1000 \t Train Err: 0.023554792627692223 0.00018602493219077587 0.0004451847053132951 0.011047394014894962 0.03432558849453926\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 222/1000 \t Train Err: 0.016859270632267 0.0002703650388866663 0.00010159210069105029 0.0054770237766206264 0.013049724511802197\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 223/1000 \t Train Err: 0.018101878464221954 5.2419105486478657e-05 6.96199931553565e-05 0.005247439723461866 0.011564591899514198\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 224/1000 \t Train Err: 0.015453542582690716 0.00010983451647916809 0.00022485408408101648 0.005099967587739229 0.012966718524694443\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 225/1000 \t Train Err: 0.018901851028203964 4.1673782106954604e-05 0.0015293046599254012 0.00927029736340046 0.015386526472866535\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 226/1000 \t Train Err: 0.019163722172379494 0.0016058672918006778 0.002710553817451 0.003990732133388519 0.013572349213063717\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 227/1000 \t Train Err: 0.0185165386646986 8.810655708657578e-05 0.0001792146940715611 0.007222854066640139 0.020297806710004807\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 228/1000 \t Train Err: 0.020369376987218857 0.0009141123155131936 0.001427489914931357 0.008912032470107079 0.02442525327205658\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 229/1000 \t Train Err: 0.01552458293735981 0.00011080846888944507 6.443277379730716e-05 0.004490815103054047 0.00922235008329153\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 230/1000 \t Train Err: 0.0195521991699934 2.673462404345628e-05 0.0002968674525618553 0.0058441669680178165 0.013992007821798325\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 231/1000 \t Train Err: 0.01986689493060112 4.5369306462816894e-05 4.316281047067605e-05 0.005999550223350525 0.015800679102540016\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 232/1000 \t Train Err: 0.019927091896533966 0.00010001207556342706 0.0014023327967152 0.0054413750767707825 0.027315480634570122\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 233/1000 \t Train Err: 0.014753343537449837 0.0003090432728640735 8.334611629834399e-05 0.004138716962188482 0.004037454724311829\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:31<00:00,  1.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 234/1000 \t Train Err: 0.012261731550097466 0.0003884017060045153 0.00043851000373251736 0.00813040230423212 0.007334974128752947\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 235/1000 \t Train Err: 0.011006608605384827 0.0003388413751963526 0.00021402201673481613 0.005710902623832226 0.008260917849838734\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.04it/s]"
+      "00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:30<00:00,  1.03it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 236/1000 \t Train Err: 0.00900082103908062 0.0020744148641824722 0.0004009941185358912 0.005855425260961056 0.000494154985062778\n"
+      "Epoch 44/100 \t Train Err: 0.36583498027175665 0.0006311295665000216 0.0070743790856795385 0.13195386109873652 0.4824865907430649\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "  6%|████████████▌                                                                                                                                                                                           | 2/32 [00:01<00:30,  1.00s/it]"
+      " 69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                              | 22/32 [00:21<00:09,  1.06it/s]"
      ]
     }
    ],
@@ -4108,7 +1176,8 @@
     "        \n",
     "        with open('loss', 'a') as f:\n",
     "            f.write(f\"{train_err[-1]} {len1[-1]} {len2[-1]} {len3[-1]} {leninf[-1]}\\n\")\n",
-    "    print(f\"Epoch {epoch}/{NEPOCHS} \\t Train Err: {train_err[-1]} {len1[-1]} {len2[-1]} {len3[-1]} {leninf[-1]}\")\n",
+    "    epoch_err = lambda l: sum(l[-EPOCH_SIZE // BSZ:]) / EPOCH_SIZE * BSZ\n",
+    "    print(f\"Epoch {epoch}/{NEPOCHS} \\t Train Err: {epoch_err(train_err)} {epoch_err(len1)} {epoch_err(len2)} {epoch_err(len3)} {epoch_err(leninf)}\")\n",
     "\n",
     "    epoch += 1\n",
     "    if epoch % 10 == 0:\n",
@@ -4118,7 +1187,6 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -4148,7 +1216,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -4174,21 +1241,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "execution_state": "idle",
+   "execution_count": 1,
    "metadata": {
     "id": "LoGEmM5lH7_A"
    },
    "outputs": [
     {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhIAAAGlCAYAAACvGh/vAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqQElEQVR4nO3df3RU9Z3/8ddkIgPFJBpYEgYTSVsUA5goBFbwu8Kab/mmCGV7/NUvhRTPwVYHAdNjge0CWpWIntJUmy+IexTbryju2UJdPKWyWSS6VQhJ47fuVn6cRoywSWRbMxCXgHPv9w/JrJEL3Nx7585k8nyc8zk9c3PvfN5XK3nz/vwKmKZpCgAAwIGMZAcAAAD6LxIJAADgGIkEAABwjEQCAAA4RiIBAAAcI5EAAACOkUgAAADHSCQAAIBjJBIAAMAxEgkAAOAYiQQAAHCMRAIAgDS2Y8cOXX311RozZoz+/u//3vPvD3BoFwAA6enTTz9VcXGxdu/erZycHE2cOFG//e1vNWzYMM/6oCIBAECa2rdvn8aNG6dRo0bp0ksvVUVFhV577TVP+yCRAAAgRdXX12v27NkKh8MKBALavn37OffU1tZq9OjRGjx4sKZMmaJ9+/bFf3bs2DGNGjUq/nnUqFE6evSopzGSSAAAkKK6urpUUlKi2tpay59v3bpVVVVVWrNmjZqamlRSUqKZM2eqo6PDtxgzfevJJsMwdOzYMWVlZSkQCCQ7HABACjNNUydOnFA4HFZGRuL+bnzq1CmdPn3a9feYpnnO77ZQKKRQKGR5f0VFhSoqKs77fevXr9eiRYu0cOFCSdLGjRv16quv6tlnn9WKFSsUDod7VSCOHj2qyZMnu36PXswU09raakqi0Wg0Gs12a21tTdjvpf/6r/8y80cEPYnz0ksvPefamjVrbMUhydy2bVv8c3d3txkMBntdM03TXLBggTlnzhzTNE3zzJkz5le/+lXzww8/NE+cOGFeddVV5vHjxz3955NyFYmsrCxJUmtrq7Kzs5MdDgDAIaP9uoT3ET1p6Mrr34//7kiE06dPq60jppbGK5Wd5bzqET1hqGjikXN+v52vGnExx48fVywWU15eXq/reXl5eu+99yRJmZmZ+vGPf6wZM2bIMAz94Ac/8HTFhlJxaKOn5JOdnU0iAQD9mPFJ0Le+/BgKz87KcJVIxL/H599vc+bM0Zw5cxL2/SmXSAAAkIpipqGY6e55Lw0fPlzBYFDt7e29rre3tys/P9/Tvi6EVRsAANhgyHTdvDRo0CBNnDhRdXV1/x2jYaiurk433HCDp31dCBUJAABsMGTITU2h5+mysjIFg0FFIhFFIpELPnPy5EkdPnw4/rmlpUXNzc3Kzc1VYWGhqqqqVFlZqUmTJmny5MmqqalRV1dXfBWHH0gkAADwUUNDg+05Evv379eMGTPin6uqqiRJlZWV2rx5s+644w599NFHWr16tdra2lRaWqqdO3eeMwEzkVLurI1oNKqcnBx1dnYy2RIAcEF+/M7o6aP1vVGuV20UjD2adr/fqEgAAGCD23kOXs+RSBVMtgQAAI5RkQAAwAZDpmJUJM5BRQIAABu8Wv5ZVlam4uLi8x7E1d9QkQAAwEd9WbXRH5BIAABgQ8w0FXOx0NHNs6mMRAIAABuMs83N8+mIORIAAMAxKhIAANgQc7lqw82zqYyKBAAANsRM902s2gAAYGDyao5Euq3aoCIBAAAcoyIBAIANhgKKKeDq+XREIgEAgA2G+Vlz83w66vPQRn19vWbPnq1wOKxAIKDt27ef997vfe97CgQCqqmpcRsnAABIQX1OJLq6ulRSUnLR2abbtm3T22+/rXA47CY+AABSQuzs0Iablo76PLRRUVGhioqKC95z9OhR3XffffrNb36jWbNmuYkPAICU4DYZSNdEwvNVG4ZhaP78+XrggQc0bty4i97f3d2taDTaqwEAkK7YR+Ii1q1bp8zMTC1ZssTW/dXV1XrooYe8DgMAAE8ZZkCG6WLVxtln2UfiAhobG/XTn/5UmzdvViBg7x/2ypUr1dnZGW+tra1ehgQAgCeYI2HN00TijTfeUEdHhwoLC5WZmanMzEwdOXJE3//+9zV69GjLZ0KhkLKzs3s1AADQP3g6tDF//nyVl5f3ujZz5kzNnz9fCxcu9LIrAAB8FVOGYi7+/h3zNJrU0edE4uTJkzp8+HD8c0tLi5qbm5Wbm6vCwkINGzas1/2XXHKJ8vPzdfXVV3sTMQAASWC6nCNhung2lfU5kdi/f79mzJgR/1xVVSVJqqys1ObNm72NDgCAFMHyT2t9TiSmT58u07S/z+f777/f1y4AAEA/wVkbAADYEDMzFDNdzJHgrA0AAAYuQwEZynDRPhvaYEMqAADgWLptSEUiAQCADUy2tEYiAQCADe7nSKTnJAnmSAAAAMeoSAAAYMNnky1dHNrF0AYAAAOX4XKLbEMMbQAAAPRCRQIAABuYbGmNRAIAABt6NpZy/jyJBAAAA1bMDCjm4gRPN8+mMuZIAADgI7bIBgBgAIq5XLUROzu0wRbZAAAMQIaZIcPFZEsjTSdbMrQBAAAcoyIBAIANXg1tpBsSCQAAbDBcrrwwPI0mdTC0AQAAHKMiAQCADe43pErPv7uTSAAAYIP7LbLTM5FIz7cCAAC+oCIBAIANhgIy5GayZXpukU0iAQCADQxtWCORAADABvf7SKRnIpGebwUAAHxBRQIAABsMMyDDzYZUHCMOAMDAZZwd2nDaevaR4BhxAADgGMeIAwAwALk/Rjw9BwFIJAAAsCGmgGIu9oJw82wqS8/0CAAA+IKKBAAANjC0YY1EAgAAG2IuhydinkaTOtIzPQIAAL6gIgEAgA0MbVgjkQAAwAYO7bJGIgEAgA2my2PETZZ/AgAA9EZFAgAAGxjasEYiAQCADZz+aS090yMAAOCLPicS9fX1mj17tsLhsAKBgLZv3x7/2ZkzZ7R8+XJNmDBBQ4cOVTgc1oIFC3Ts2DGv4wYAwFdujhDvaemoz2/V1dWlkpISy3PUP/nkEzU1NWnVqlVqamrSL3/5Sx04cEBz5szxKl4AAJKiZ2jDTUtHfZ4jUVFRoYqKCsuf5eTkaNeuXb2u/exnP9PkyZP1wQcfqLCw0HmkAAAg5SR8smVnZ6cCgYAuu+wyy593d3eru7s7/jkajSY6JAAA+sxQhgwXwxNunk1lCX2rU6dOafny5frWt76l7Oxsy3uqq6uVk5MTbwUFBYkMCQAAR2JmwHVLRwlLJM6cOaPbb79dpmlqw4YN571v5cqV6uzsjLfW1tZEhQQAQNKVlZWpuLjYcq5hf5SQoY2eJOLIkSP6l3/5l/NWIyQpFAopFAolIgwAADzj1T4SDQ0NF/y92N94nkj0JBGHDh3S7t27NWzYMK+7AADAd6bL0z9Ndrb8zMmTJ3X48OH455aWFjU3Nys3N1cjR47UrbfeqqamJu3YsUOxWExtbW2SpNzcXA0aNMjb6AEA8ElMAcVcHLzl5tlU1udEYv/+/ZoxY0b8c1VVlSSpsrJSDz74oF555RVJUmlpaa/ndu/erenTp7uPGAAApIw+JxLTp0+XaZrn/fmFfgYAQH9lmO7OyzDS9Ncjh3YBAGCD4XKOhJtnU1l6vhUAAPAFFQkAAGwwFJDhYsKkm2dTGYkEAAA2uN2dkp0tAQAAvoCKBAAANjDZ0hqJBAAANhhyuUV2ms6RSM/0CAAA+IKKBAAANpguV22YaVqRIJEAAMAGr07/TDckEgAA2MBkS2vp+VYAAMAXVCQAALCBoQ1rJBIAANjAFtnWGNoAAACOUZEAAMAGhjaskUgAAGADiYQ1hjYAAIBjVCQAALCBioQ1EgkAAGwgkbDG0AYAAHCMigQAADaYLveCMD2NJnVQkQAAwIaeoQ03LRX9zd/8jS6//HLdeuutjp4nkQAAwIZ0TSSWLl2qn//8546fJ5EAAGAAmz59urKyshw/TyIBAIANyahI1NfXa/bs2QqHwwoEAtq+ffs599TW1mr06NEaPHiwpkyZon379nn0xvYw2RIAABuSsfyzq6tLJSUluuuuu/TNb37znJ9v3bpVVVVV2rhxo6ZMmaKamhrNnDlTBw4c0IgRIyRJpaWl+vTTT8959rXXXlM4HHb4Nv+NRAIAAB9Fo9Fen0OhkEKhkOW9FRUVqqioOO93rV+/XosWLdLChQslSRs3btSrr76qZ599VitWrJAkNTc3exr/FzG0AQCADaYZcN0kqaCgQDk5OfFWXV3tKJ7Tp0+rsbFR5eXl8WsZGRkqLy/XW2+95dl7XwwVCQAAbDAUcLWPRM+zra2tys7Ojl8/XzXiYo4fP65YLKa8vLxe1/Py8vTee+/Z/p7y8nK988476urq0hVXXKF/+Id/0A033GD7eRIJAAB8lJ2d3SuRSLZ//ud/dvU8iQQAADak2lkbw4cPVzAYVHt7e6/r7e3tys/P97SvC2GOBAAANng1R8IrgwYN0sSJE1VXVxe/ZhiG6urq+jQ04RYVCQAAfFRWVqZgMKhIJKJIJHLBe0+ePKnDhw/HP7e0tKi5uVm5ubkqLCxUVVWVKisrNWnSJE2ePFk1NTXq6uqKr+LwA4kEAAA2eDW00dDQYHuOxP79+zVjxoz456qqKklSZWWlNm/erDvuuEMfffSRVq9erba2NpWWlmrnzp3nTMBMJBIJAABscDs84eTZ6dOnyzQvfG7o4sWLtXjxYsdxuUUiAQCADabLioTXcyRSBZMtAQCAY1QkAACwwZR0kVGGiz6fjqhIAABgQ8/Olm6azq7aKC4uVm1tbbJfyRNUJAAA8FFfVm30B32uSFzsbHTTNLV69WqNHDlSQ4YMUXl5uQ4dOuRlzAAA+C7VNqRKFX1OJHrORj9fSebxxx/Xk08+qY0bN2rv3r0aOnSoZs6cqVOnTnkRLwAASdGzj4Sblo76PLRxobPRTdNUTU2N/u7v/k7f+MY3JEk///nPlZeXp+3bt+vOO+90HzEAAEgZnk62bGlpUVtbW6+z0XNycjRlyhRfz0YHAMBrpum+pSNPJ1u2tbVJZ89C/7y8vLz4z76ou7tb3d3d8c/RaNTLkAAA8EQydrbsD5K+/LO6ulo5OTnxVlBQkOyQAABIGJZ/XkDP+eft7e0aOXJk/Hp7e7tKS0stn1m5cmX8EBKdrUiQTAD9j9F2lW99ZeQf9K0voIdXFYkBv/zzQoqKipSfn9/rbPRoNKq9e/ee92z0UCik7OzsXg0AgFTDqg1rfa5IXOxs9GXLlumRRx7RmDFjVFRUpFWrVikcDmvu3Llexw4AgG/cTphksuVZFzsb/Qc/+IG6urp099136+OPP9aNN96onTt3avDgwd5GDgAAkq7PicTFzkYPBAL60Y9+pB/96EduYwMAIGV8VpFwM0fC03BSBmdtAABgA8s/rSV9+ScAAAMJyz8BwIKfSzL9WmrKMlN8nnm2uXleabj8k0QCAAAbGNqwxtAGAABwjIoEAAB2eDW2kWZIJAAAsMPl0IbSdGiDRAIAABvY2dIacyQAAIBjVCQAALCBVRvWSCQA9Dvs74CkMAPu5jmkaSLB0AYAAD5iZ0sAAAYgryZbsrMlAAADEftIWGJoAwAAOEZFAgAAG1i1YY1EAgAAu9J0eMINEgkgzb3x/ld96ed/jD7sSz8AUguJBAAANjC0YY1EAgAAO1i1YYlEAgAAWwJnm5vn0w/LPwEAgGMkEgAA2GF60NgiGwCAAcqjORJskQ2gX2FZJoBEIpEAAMAOjhG3RCIBAIANXp3+mW6YbAkAAByjIgEAgB1sSGWJRAIAADuYI2GJoQ0AAOAYFQkAAGwImJ81N8+nIxIJIM09+m+3+NLPD8ft8KUfIGmYI2GJRAIAADuYI2GJORIAAMAxKhIAANjB0IYlKhIAANjB6Z+WqEgAAOAjTv8EAGAgYmjDEokE8DljH/yJL/3cdftrvvTzmaCPfQFpjFUblpgjAQAAHKMiAQCADexsac3zikQsFtOqVatUVFSkIUOG6Ctf+Yoefvhhmel6EDsAYGDwaNVGuvG8IrFu3Tpt2LBBzz//vMaNG6f9+/dr4cKFysnJ0ZIlS7zuDgAAJJHnicRvf/tbfeMb39CsWbMkSaNHj9aLL76offv2ed0VAABIMs+HNqZOnaq6ujodPHhQkvTOO+/ozTffVEVFheX93d3dikajvRoAAKkm8Ll5Eo5asl8gQTyvSKxYsULRaFRjx45VMBhULBbTo48+qnnz5lneX11drYceesjrMAAA8BbLPy15nki8/PLLeuGFF7RlyxaNGzdOzc3NWrZsmcLhsCorK8+5f+XKlaqqqop/jkajKigo8DoswJb3Hrzfl36ePrDdl34k6btX7/GtLwADj+eJxAMPPKAVK1bozjvvlCRNmDBBR44cUXV1tWUiEQqFFAqFvA4DAABvsbOlJc8TiU8++UQZGb2nXgSDQRmG4XVXAAD4h0TCkueJxOzZs/Xoo4+qsLBQ48aN0+9+9zutX79ed911l9ddAQCAJPM8kXjqqae0atUq3Xvvvero6FA4HNZ3v/tdrV692uuuAADwDTtbWvM8kcjKylJNTY1qamq8/moAAJKHoQ1LHNoFAAAc49Au4HNG/+IxX/q5d+KXfOkHgIeoSFgikQAAwAbmSFhjaAMAAB+VlZWpuLhYtbW1yQ7FE1QkAACww6MtshsaGpSdne1dXElGIgEAgB3MkbBEIgEAgA3MkbDGHAkAAOAYFQkgCf7tZDjZIQDoK4Y2LJFIAABgh8uhjXRNJBjaAAAAjlGRAADADoY2LJFIAABgB4mEJYY2AACAY1QkAACwgX0krFGRAAAAjlGRAD7n/9z4f33p5+tF7/rSDwAkGokEAAB2MNnSEokEAAA2MEfCGokEAAB2pWky4AaTLQEAgGNUJAAAsIM5EpZIJAAAsIE5EtZIJIDPaTtzWbJDAIB+hUQCAAA7GNqwRCIBAIANDG1YY9UGAABwjIoEAAB2MLRhiUQCAAA7SCQsMbQBAAAcoyIBAIANTLa0RiKBfuGa1T/xpZ+8m/7Sl37uusqXbgB4KQ2HNlpbWzV//nx1dHQoMzNTq1at0m233dan7yCRAADAjjRMJDIzM1VTU6PS0lK1tbVp4sSJ+vrXv66hQ4fa/46ERggAAFLWyJEjNXLkSElSfn6+hg8frj/96U99SiSYbAkAgA09cyTctL6qr6/X7NmzFQ6HFQgEtH379nPuqa2t1ejRozV48GBNmTJF+/btc/R+jY2NisViKigo6NNzJBIAANhhetD6qKurSyUlJaqtrbX8+datW1VVVaU1a9aoqalJJSUlmjlzpjo6OuL3lJaWavz48ee0Y8eOxe/505/+pAULFmjTpk19jpGhDQAAUlRFRYUqKirO+/P169dr0aJFWrhwoSRp48aNevXVV/Xss89qxYoVkqTm5uYL9tHd3a25c+dqxYoVmjp1ap9jpCIBAIANXg1tRKPRXq27u9tRPKdPn1ZjY6PKy8vj1zIyMlReXq633nrL1neYpqnvfOc7+uu//mvNnz/fURxUJNA/XBf1pZv3D+T70o9u9qcbAB7yaNXGF+cgrFmzRg8++GCfv+748eOKxWLKy8vrdT0vL0/vvfeere/413/9V23dulXXXnttfP7FL37xC02YMMF2HCQSAAD4qLW1VdnZ2fHPoVAoabHceOONMgzD1XeQSAAAYIdHFYns7OxeiYRTw4cPVzAYVHt7e6/r7e3tys/3qbrKHAkAAOwJeNC8NGjQIE2cOFF1dXXxa4ZhqK6uTjfccIPHvZ1fQhKJo0eP6tvf/raGDRumIUOGaMKECdq/f38iugIAIG2dPHlSzc3N8ZUXLS0tam5u1gcffCBJqqqq0jPPPKPnn39ef/jDH3TPPfeoq6srvorDD54Pbfz5z3/WtGnTNGPGDP3617/WX/zFX+jQoUO6/PLLve4KAAD/eDS0UVZWpmAwqEgkokgkcsFH9u/frxkzZsQ/V1VVSZIqKyu1efNm3XHHHfroo4+0evVqtbW1qbS0VDt37jxnAmYieZ5IrFu3TgUFBXruuefi14qKirzuBgAAX3l1+mdDQ4PtORLTp0+XaV6408WLF2vx4sXOA3PJ80TilVde0cyZM3Xbbbdpz549GjVqlO69914tWrTI8v7u7u5ea2ijUX+W+aF/2f+Xz/rSz9CRR3zpB0A/lIaHdnnB8zkSf/zjH7VhwwaNGTNGv/nNb3TPPfdoyZIlev755y3vr66uVk5OTrz1dY9vAACQPJ4nEoZh6Prrr9fatWt13XXX6e6779aiRYu0ceNGy/tXrlypzs7OeGttbfU6JAAAvOHjORv9hedDGyNHjlRxcXGva9dcc43+8R//0fL+UCiU1M04AACww6s5EunG84rEtGnTdODAgV7XDh48qCuvvNLrrgAA6HfKyspUXFx83hM9+xvPKxL333+/pk6dqrVr1+r222/Xvn37tGnTJkdHkwIAkDI8mmzZl1Ub/YHnFYmysjJt27ZNL774osaPH6+HH35YNTU1mjdvntddAQDgG69O/0w3CTlr45ZbbtEtt9ySiK8GAAAphEO74Nj0/7XOt77e/9/3+tPPAl+6AdAfsY+EJRIJAABsYNWGNU7/BADAR6zaAABgIGLVhiUSCQAA7GCOhCUSCQAAbGCOhDXmSAAAAMeoSMCxzi8P8q2v9xfc71tfAGCJoQ1LJBIAANgQME0FTOfZgJtnUxlDGwAAwDESCQAA7DA9aOwjAQDAwOTVqo1020eCigQAAHCMigQAAHawasMSiQQca1jt3/jetT6t/vx/P2GZKQBrbEhljaENAADgGBUJAADsYGjDEokEAAA2MLRhjUQCAAA7qEhYYo4EAAA+YkMqAAAGKC+GJ9JtQyoSCQAA7DDNz5qb59MQiUQa+p8Zt/nSzwcPRXzpR5IOsL8DAKQkEgkAAGxg1YY1EgkAAOxg1YYlVm0AAADHqEgAAGBDwPisuXk+HZFIAABgB0MblhjaAAAAjlGRSEMZ14/zpZ+il4/70o8kaZV/XQGAFVZtWKMiAQCAHT0bUrlpbJENAMDA5FVFIt22yKYiAQAAHKMiAQCAHazasEQiAQCADUy2tMbQBgAAcIyKBAAAdnCMuCUSiTT06aWDfOnnku4zvvQDAKmAoQ1rDG0AAADHqEgAAGAHqzYskUgAAGADQxvWEj608dhjjykQCGjZsmWJ7goAAPgsoRWJhoYGPf3007r22msT2Q0AAIlnmJ81N8+noYRVJE6ePKl58+bpmWee0eWXX56obgAA8IfpQUtDCatIRCIRzZo1S+Xl5XrkkUcS1Q0s7Hppsy/9ZOQf9KUfAEgFAZfzHAJn/7esrEzBYFCRSESRSMSr8JImIYnESy+9pKamJjU0NFz03u7ubnV3d8c/R6PRRIQEAEBK4PTPi2htbdXSpUv1wgsvaPDgwRe9v7q6Wjk5OfFWUFDgdUgAALjXs7Olm5aGPE8kGhsb1dHRoeuvv16ZmZnKzMzUnj179OSTTyozM1OxWKzX/StXrlRnZ2e8tba2eh0SAACu9Sz/dNPSkedDGzfffLN+//vf97q2cOFCjR07VsuXL1cwGOz1s1AopFAo5HUYAADAB54nEllZWRo/fnyva0OHDtWwYcPOuQ4AQL/BzpaW2NkSAAAbAqapgIt5Dm6eTWW+JBKvv/66H93grJnhEl/62WX40g0AIIVRkQAAwA7jbHPzfBoikQAAwAaGNqwl/NAuAACQvqhIAABgB6s2LJFIAABgh9vdKdN0aINEAgAAG9zuTpmuO1syRwIAADhGRQIAADsY2rBEIgEAgA0B47Pm5vl0xNAGAABwjIoEAAB2MLRhiUQCAAA72EfCEkMbAAD4qKysTMXFxaqtrU12KJ6gIgEAgA1enbXR0NCg7OxsDyNLLhIJnxhtV/nW1y7joG99AcCAwRwJSwxtAAAAx6hIAABghynJzV4Q6VmQIJEAAMAOr+ZIpBsSCQAA7DBdznNIzzyCORIAAMA5KhIAANjBqg1LJBIAANhhSAq4fD4NkUj4JCOfvR0AAOmHRAIAABtYtWGNRAIAADuYI2GJVRsAAMAxKhIAANhBRcISiQQAAHaQSFhiaAMAADhGRQIAADvYR8ISiQQAADaw/NMaiQQAAHYwR8IScyQAAIBjVCQAALDDMKWAi6qCkZ4VCRIJAADsYGjDEkMbAADAMSoSAADY4rIiofSsSJBIAABgB0MblhjaAAAAjlGRAADADsN0NzzBqg0AAAYw0/isuXk+DTG0AQDAAPXxxx9r0qRJKi0t1fjx4/XMM8/0+Ts8TySqq6tVVlamrKwsjRgxQnPnztWBAwe87gYAAH/1TLZ001JMVlaW6uvr1dzcrL1792rt2rX6z//8zz59h+eJxJ49exSJRPT2229r165dOnPmjL72ta+pq6vL664AAPCPYbpvKSYYDOpLX/qSJKm7u1umacrsY8Lj+RyJnTt39vq8efNmjRgxQo2Njfqrv/orr7tzzWi7ypd+MvIP+tIPACBBkrD8s76+Xk888YQaGxv1H//xH9q2bZvmzp3b657a2lo98cQTamtrU0lJiZ566ilNnjzZdh8ff/yxbrrpJh06dEhPPPGEhg8f3qcYEz5HorOzU5KUm5tr+fPu7m5Fo9FeDQAASF1dXSopKVFtba3lz7du3aqqqiqtWbNGTU1NKikp0cyZM9XR0RG/p2f+wxfbsWPHJEmXXXaZ3nnnHbW0tGjLli1qb2/vU4wJXbVhGIaWLVumadOmafz48Zb3VFdX66GHHkpkGAAAuGe63FTq7KNf/AtzKBRSKBSyfKSiokIVFRXn/cr169dr0aJFWrhwoSRp48aNevXVV/Xss89qxYoVkqTm5mZb4eXl5amkpERvvPGGbr31VrtvldiKRCQS0bvvvquXXnrpvPesXLlSnZ2d8dba2prIkAAAcMajyZYFBQXKycmJt+rqakfhnD59Wo2NjSovL49fy8jIUHl5ud566y1b39He3q4TJ05IZ0cQ6uvrdfXVV/cpjoRVJBYvXqwdO3aovr5eV1xxxXnvu1AmBgBAumltbVV2dnb8s9PfgcePH1csFlNeXl6v63l5eXrvvfdsfceRI0d09913xydZ3nfffZowYUKf4vA8kegJZNu2bXr99ddVVFTkdRcAAPjPMCS52FTK+OzZ7OzsXolEMk2ePNn20Mf5eJ5IRCIRbdmyRb/61a+UlZWltrY2SVJOTo6GDBnidXcAAPgjxQ7tGj58uILB4DmTI9vb25Wfn+9pXxfieSKxYcMGSdL06dN7XX/uuef0ne98x+vuXGNZJgCgPxo0aJAmTpyourq6+JJQwzBUV1enxYsX+xZHQoY2AABIOx5VJMrKyhQMBhWJRBSJRC74yMmTJ3X48OH455aWFjU3Nys3N1eFhYWqqqpSZWWlJk2apMmTJ6umpkZdXV3xVRx+4NAuAADs8Oj0z4aGBttzJPbv368ZM2bEP1dVVUmSKisrtXnzZt1xxx366KOPtHr1arW1tam0tFQ7d+48ZwJmIpFIAACQoqZPn37RSv/ixYt9Hcr4IhIJAABsME1DpoujwN08m8pIJAAAsMN0efBWms4hJJEAAMAO0+UciTRNJBJ+aBcAAPhvZWVlKi4uPu9BXP0NFQkAAOwwDCngYp7D2TkSfVm10R+QSAAAYAdDG5YY2gAAAI5RkQAAwAbTMGS6GNpg+ScAAAMZQxuWGNoAAACOkUgAAGCHYbpvLP/0j9F+nYxPggnvh2PEAQC2mKYkN8s/+35oV39ARQIAADiWshUJAABSiWmYMgPOJ0xe7BTP/opEAgAAO0zD5dAGyz8BABiwqEhYY44EAABwLOUqEj0ZW/SkPyWgjC9FfekHAOC9aPSzP8P9+Nv+p2a3q+GJT3VGOrv8MxgMKhKJKBKJeBhhcgTMFKu1fPjhhyooKEh2GACAfqS1tVVXXHFFQr771KlTKioqUltbm+vvys/PV0tLiwYPHuxJbKkg5RIJwzB07NgxZWVlKRAI+NJnNBpVQUGBWltb02ptb490fz/xjmkh3d9PvGNCmKapEydOKBwOKyMjcaP1p06d0unTp11/z6BBg9IqiVAqDm1kZGQkLKu8mOzs7LT9j1sD4P3EO6aFdH8/8Y6ey8nJSXgfgwcPTrsEwCtMtgQAAI6RSAAAAMdIJCSFQiGtWbNGoVAo2aEkRLq/n3jHtJDu7yfeEWkq5SZbAgCA/oOKBAAAcIxEAgAAOEYiAQAAHCORAAAAjg3YRKK6ulplZWXKysrSiBEjNHfuXB04cCDZYSXUY489pkAgoGXLliU7FE8dPXpU3/72tzVs2DANGTJEEyZM0P79+5MdlidisZhWrVqloqIiDRkyRF/5ylf08MMP9+tTBOvr6zV79myFw2EFAgFt3769189N09Tq1as1cuRIDRkyROXl5Tp06FDS4nXiQu945swZLV++XBMmTNDQoUMVDoe1YMECHTt2LKkx99XF/j1+3ve+9z0FAgHV1NT4GiP8MWATiT179igSiejtt9/Wrl27dObMGX3ta19TV1dXskNLiIaGBj399NO69tprkx2Kp/785z9r2rRpuuSSS/TrX/9a//7v/64f//jHuvzyy5MdmifWrVunDRs26Gc/+5n+8Ic/aN26dXr88cf11FNPJTs0x7q6ulRSUqLa2lrLnz/++ON68skntXHjRu3du1dDhw7VzJkzderUKd9jdepC7/jJJ5+oqalJq1atUlNTk375y1/qwIEDmjNnTlJidepi/x57bNu2TW+//bbC4bBvscFnJkzTNM2Ojg5Tkrlnz55kh+K5EydOmGPGjDF37dpl3nTTTebSpUuTHZJnli9fbt54443JDiNhZs2aZd511129rn3zm980582bl7SYvCTJ3LZtW/yzYRhmfn6++cQTT8Svffzxx2YoFDJffPHFJEXpzhff0cq+fftMSeaRI0d8i8tL53vHDz/80Bw1apT57rvvmldeeaX5k5/8JCnxIbEGbEXiizo7OyVJubm5yQ7Fc5FIRLNmzVJ5eXmyQ/HcK6+8okmTJum2227TiBEjdN111+mZZ55JdliemTp1qurq6nTw4EFJ0jvvvKM333xTFRUVyQ4tIVpaWtTW1tbr/6s5OTmaMmWK3nrrraTGlkidnZ0KBAK67LLLkh2KZwzD0Pz58/XAAw9o3LhxyQ4HCZRyh3Ylg2EYWrZsmaZNm6bx48cnOxxPvfTSS2pqalJDQ0OyQ0mIP/7xj9qwYYOqqqr0t3/7t2poaNCSJUs0aNAgVVZWJjs811asWKFoNKqxY8cqGAwqFovp0Ucf1bx585IdWkL0HNOcl5fX63peXp4nRzinolOnTmn58uX61re+lVYHea1bt06ZmZlasmRJskNBgpFInP0b+7vvvqs333wz2aF4qrW1VUuXLtWuXbvS9tQ6wzA0adIkrV27VpJ03XXX6d1339XGjRvTIpF4+eWX9cILL2jLli0aN26cmpubtWzZMoXD4bR4v4HuzJkzuv3222WapjZs2JDscDzT2Nion/70p2pqalIgEEh2OEiwAT+0sXjxYu3YsUO7d+9O2vHlidLY2KiOjg5df/31yszMVGZmpvbs2aMnn3xSmZmZisViyQ7RtZEjR6q4uLjXtWuuuUYffPBB0mLy0gMPPKAVK1bozjvv1IQJEzR//nzdf//9qq6uTnZoCZGfny9Jam9v73W9vb09/rN00ZNEHDlyRLt27UqrasQbb7yhjo4OFRYWxv/sOXLkiL7//e9r9OjRyQ4PHhuwFQnTNHXfffdp27Ztev3111VUVJTskDx388036/e//32vawsXLtTYsWO1fPlyBYPBpMXmlWnTpp2zbPfgwYO68sorkxaTlz755BNlZPTO94PBoAzDSFpMiVRUVKT8/HzV1dWptLRUkhSNRrV3717dc889yQ7PMz1JxKFDh7R7924NGzYs2SF5av78+efMyZo5c6bmz5+vhQsXJi0uJMaATSQikYi2bNmiX/3qV8rKyoqPv+bk5GjIkCHJDs8TWVlZ58z5GDp0qIYNG5Y2c0Huv/9+TZ06VWvXrtXtt9+uffv2adOmTdq0aVOyQ/PE7Nmz9eijj6qwsFDjxo3T7373O61fv1533XVXskNz7OTJkzp8+HD8c0tLi5qbm5Wbm6vCwkItW7ZMjzzyiMaMGaOioiKtWrVK4XBYc+fOTWrcfXGhdxw5cqRuvfVWNTU1aceOHYrFYvE/f3JzczVo0KAkRm7fxf49fjE5uuSSS5Sfn6+rr746CdEioZK9bCRZJFm25557LtmhJVS6Lf80TdP8p3/6J3P8+PFmKBQyx44da27atCnZIXkmGo2aS5cuNQsLC83BgwebX/7yl80f/vCHZnd3d7JDc2z37t2W/+1VVlaa5tkloKtWrTLz8vLMUChk3nzzzeaBAweSHXafXOgdW1pazvvnz+7du5Mdum0X+/f4RSz/TF8cIw4AABwb8JMtAQCAcyQSAADAMRIJAADgGIkEAABwjEQCAAA4RiIBAAAcI5EAAACOkUgAAADHSCQAAIBjJBIAAMAxEgkAAOAYiQQAAHDs/wM2kAk69+6pegAAAABJRU5ErkJggg==",
-      "text/plain": [
-       "<Figure size 640x480 with 2 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "ename": "NameError",
+     "evalue": "name 'mkbatch' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m batch_src, batch_labels, batch_padding_mask \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmap\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m x: x\u001b[38;5;241m.\u001b[39mto(device), \u001b[43mmkbatch\u001b[49m(BSZ))\n\u001b[1;32m      2\u001b[0m model\u001b[38;5;241m.\u001b[39meval()\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'mkbatch' is not defined"
+     ]
     }
    ],
    "source": [
@@ -4209,7 +1276,6 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
@@ -4260,7 +1326,6 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -4280,7 +1345,6 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
@@ -4414,7 +1478,6 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
@@ -4439,7 +1502,6 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
@@ -4469,7 +1531,6 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
@@ -4541,7 +1602,6 @@
   {
    "cell_type": "code",
    "execution_count": 27,
-   "execution_state": "idle",
    "metadata": {},
    "outputs": [
     {
author	SIPB	2024-12-08 17:12:01 -0500
committer	SIPB	2024-12-08 17:12:01 -0500
commit	6ae42b74e177f31b6dbcd06a1ae29be34deac8bb (patch)
tree	afac4daf9b51dc992f2152b9f792108a8d6714f8
parent	f54a040ecea0e2273e6bd06874ca4c834b4b8caf (diff)