aboutsummaryrefslogtreecommitdiff
path: root/transformer_shortest_paths.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'transformer_shortest_paths.ipynb')
-rw-r--r--transformer_shortest_paths.ipynb1445
1 files changed, 836 insertions, 609 deletions
diff --git a/transformer_shortest_paths.ipynb b/transformer_shortest_paths.ipynb
index 3949fd5..c9ff777 100644
--- a/transformer_shortest_paths.ipynb
+++ b/transformer_shortest_paths.ipynb
@@ -86,7 +86,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 3,
"execution_state": "idle",
"metadata": {
"colab": {
@@ -391,7 +391,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 8,
"execution_state": "idle",
"metadata": {
"id": "tLOWhg_CeWzH"
@@ -432,7 +432,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 9,
"execution_state": "idle",
"metadata": {
"colab": {
@@ -446,8 +446,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Training data: 1048M\n",
- "Trainable parameters in the model: 200K\n"
+ "Training data: 104M\n",
+ "Trainable parameters in the model: 200545\n"
]
}
],
@@ -455,7 +455,7 @@
"# PARAMS\n",
"VOCAB_SIZE = 1 + MAX_VTXS + 1 # pad plus max number of vertices plus target token\n",
"MODEL_DIM = 64 # Dimension of model (embedding and transformer)\n",
- "NEPOCHS = 1000\n",
+ "NEPOCHS = 100\n",
"BSZ = 2**17 # Batch size\n",
"BPE = 8 # Batches per epoch\n",
"NHEADS = 2\n",
@@ -469,9 +469,13 @@
"\n",
"trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
"print(f\"Training data: {NEPOCHS*BPE*BSZ//10**6}M\")\n",
- "print(f\"Trainable parameters in the model: {trainable_params//1000}K\")\n",
+ "print(f\"Trainable parameters in the model: {trainable_params}\")\n",
"\n",
"train_err = []\n",
+ "len1 = []\n",
+ "len2 = []\n",
+ "len3 = []\n",
+ "len15 = []\n",
"epoch = 0\n",
"\n",
"# clear loss file\n",
@@ -495,17 +499,17 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 10,
"execution_state": "idle",
"metadata": {},
"outputs": [],
"source": [
- "model = TransformerModel(input_dim=VOCAB_SIZE, model_dim=MODEL_DIM,\n",
- " output_dim=1, num_heads=NHEADS,\n",
- " num_layers=NLAYERS, seq_len=SEQ_LEN,\n",
- " dropout=DROPOUT).to(device)\n",
- "model = torch.compile(model)\n",
- "model.load_state_dict(torch.load('model.pth', weights_only=True))\n",
+ "# model = TransformerModel(input_dim=VOCAB_SIZE, model_dim=MODEL_DIM,\n",
+ "# output_dim=1, num_heads=NHEADS,\n",
+ "# num_layers=NLAYERS, seq_len=SEQ_LEN,\n",
+ "# dropout=DROPOUT).to(device)\n",
+ "# model = torch.compile(model)\n",
+ "# model.load_state_dict(torch.load('model.pth', weights_only=True))\n",
"\n",
"LR = 8e-4\n",
"WD = 0 # 1e-5\n",
@@ -526,7 +530,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"execution_state": "idle",
"metadata": {},
"outputs": [],
@@ -553,8 +557,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "execution_state": "running",
+ "execution_count": 12,
+ "execution_state": "idle",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -570,298 +574,768 @@
"text": [
"/home/sipb/.venv/lib64/python3.12/site-packages/torch/nn/functional.py:6278: UserWarning: Memory Efficient attention on Navi31 GPU is still experimental. Enable it with TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1. (Triggered internally at ../aten/src/ATen/native/transformers/hip/sdp_utils.cpp:269.)\n",
" attn_output = scaled_dot_product_attention(\n",
- "/home/sipb/.venv/lib64/python3.12/site-packages/torch/_inductor/compile_fx.py:167: UserWarning: TensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.\n",
- " warnings.warn(\n",
- "/tmp/torchinductor_sipb/nj/cnjfg6sudczhbwjig6u6ixumyik7x7ugjn4x43lbushjy4vv4pwz.py:883: UserWarning: Attempting to use hipBLASLt on an unsupported architecture! Overriding blas backend to hipblas (Triggered internally at ../aten/src/ATen/Context.cpp:296.)\n",
- " extern_kernels.mm(reinterpret_tensor(buf1, (1048576, 64), (64, 1), 0), reinterpret_tensor(primals_5, (64, 192), (1, 64), 0), out=buf2)\n"
+ "/tmp/torchinductor_sipb/bn/cbngaobakjqlwlijvkqph5lgddb2z2kzjaln3b2g2j75b6snskdn.py:859: UserWarning: Attempting to use hipBLASLt on an unsupported architecture! Overriding blas backend to hipblas (Triggered internally at ../aten/src/ATen/Context.cpp:296.)\n",
+ " extern_kernels.mm(reinterpret_tensor(buf1, (2097152, 64), (64, 1), 0), reinterpret_tensor(primals_5, (64, 192), (1, 64), 0), out=buf2)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Epoch 0/1000 \t Train Err: 85.0000\n",
- "Epoch 0/1000 \t Train Err: 72.0000\n",
- "Epoch 0/1000 \t Train Err: 63.5000\n",
- "Epoch 0/1000 \t Train Err: 58.0000\n",
- "Epoch 0/1000 \t Train Err: 53.7500\n",
- "Epoch 0/1000 \t Train Err: 51.0000\n",
- "Epoch 0/1000 \t Train Err: 49.2500\n",
- "Epoch 0/1000 \t Train Err: 48.0000\n",
- "Epoch 0/1000 \t Train Err: 47.2500\n",
- "Epoch 0/1000 \t Train Err: 46.2500\n",
- "Epoch 0/1000 \t Train Err: 45.5000\n",
- "Epoch 0/1000 \t Train Err: 45.2500\n",
- "Epoch 0/1000 \t Train Err: 44.5000\n",
- "Epoch 0/1000 \t Train Err: 44.2500\n",
- "Epoch 0/1000 \t Train Err: 44.2500\n",
- "Epoch 0/1000 \t Train Err: 44.2500\n",
- "Epoch 1/1000 \t Train Err: 43.5000\n",
- "Epoch 1/1000 \t Train Err: 43.5000\n",
- "Epoch 1/1000 \t Train Err: 43.5000\n",
- "Epoch 1/1000 \t Train Err: 43.5000\n",
- "Epoch 1/1000 \t Train Err: 43.2500\n",
- "Epoch 1/1000 \t Train Err: 43.2500\n",
- "Epoch 1/1000 \t Train Err: 43.0000\n",
- "Epoch 1/1000 \t Train Err: 43.0000\n",
- "Epoch 1/1000 \t Train Err: 42.7500\n",
- "Epoch 1/1000 \t Train Err: 42.5000\n",
- "Epoch 1/1000 \t Train Err: 42.5000\n",
- "Epoch 1/1000 \t Train Err: 42.7500\n",
- "Epoch 1/1000 \t Train Err: 42.7500\n",
- "Epoch 1/1000 \t Train Err: 42.5000\n",
- "Epoch 1/1000 \t Train Err: 42.2500\n",
- "Epoch 1/1000 \t Train Err: 42.2500\n",
- "Epoch 2/1000 \t Train Err: 42.2500\n",
- "Epoch 2/1000 \t Train Err: 42.5000\n",
- "Epoch 2/1000 \t Train Err: 42.0000\n",
- "Epoch 2/1000 \t Train Err: 42.0000\n",
- "Epoch 2/1000 \t Train Err: 42.0000\n",
- "Epoch 2/1000 \t Train Err: 42.0000\n",
- "Epoch 2/1000 \t Train Err: 42.0000\n",
- "Epoch 2/1000 \t Train Err: 42.2500\n",
- "Epoch 2/1000 \t Train Err: 41.7500\n",
- "Epoch 2/1000 \t Train Err: 41.7500\n",
- "Epoch 2/1000 \t Train Err: 41.2500\n",
- "Epoch 2/1000 \t Train Err: 41.5000\n",
- "Epoch 2/1000 \t Train Err: 41.5000\n",
- "Epoch 2/1000 \t Train Err: 41.7500\n",
- "Epoch 2/1000 \t Train Err: 41.2500\n",
- "Epoch 2/1000 \t Train Err: 41.5000\n",
- "Epoch 3/1000 \t Train Err: 41.5000\n",
- "Epoch 3/1000 \t Train Err: 41.2500\n",
- "Epoch 3/1000 \t Train Err: 41.5000\n",
- "Epoch 3/1000 \t Train Err: 41.2500\n",
- "Epoch 3/1000 \t Train Err: 41.2500\n",
- "Epoch 3/1000 \t Train Err: 41.0000\n",
- "Epoch 3/1000 \t Train Err: 41.0000\n",
- "Epoch 3/1000 \t Train Err: 40.7500\n",
- "Epoch 3/1000 \t Train Err: 40.7500\n",
- "Epoch 3/1000 \t Train Err: 40.5000\n",
- "Epoch 3/1000 \t Train Err: 40.5000\n",
- "Epoch 3/1000 \t Train Err: 40.2500\n",
- "Epoch 3/1000 \t Train Err: 40.0000\n",
- "Epoch 3/1000 \t Train Err: 39.7500\n",
- "Epoch 3/1000 \t Train Err: 39.2500\n",
- "Epoch 3/1000 \t Train Err: 38.7500\n",
- "Epoch 4/1000 \t Train Err: 38.0000\n",
- "Epoch 4/1000 \t Train Err: 37.2500\n",
- "Epoch 4/1000 \t Train Err: 36.5000\n",
- "Epoch 4/1000 \t Train Err: 35.5000\n",
- "Epoch 4/1000 \t Train Err: 35.0000\n",
- "Epoch 4/1000 \t Train Err: 34.7500\n",
- "Epoch 4/1000 \t Train Err: 34.7500\n",
- "Epoch 4/1000 \t Train Err: 34.7500\n",
- "Epoch 4/1000 \t Train Err: 34.5000\n",
- "Epoch 4/1000 \t Train Err: 34.2500\n",
- "Epoch 4/1000 \t Train Err: 33.7500\n",
- "Epoch 4/1000 \t Train Err: 33.7500\n",
- "Epoch 4/1000 \t Train Err: 33.5000\n",
- "Epoch 4/1000 \t Train Err: 33.5000\n",
- "Epoch 4/1000 \t Train Err: 33.0000\n",
- "Epoch 4/1000 \t Train Err: 33.0000\n",
- "Epoch 5/1000 \t Train Err: 33.0000\n",
- "Epoch 5/1000 \t Train Err: 32.7500\n",
- "Epoch 5/1000 \t Train Err: 32.7500\n",
- "Epoch 5/1000 \t Train Err: 32.7500\n",
- "Epoch 5/1000 \t Train Err: 32.5000\n",
- "Epoch 5/1000 \t Train Err: 32.0000\n",
- "Epoch 5/1000 \t Train Err: 32.5000\n",
- "Epoch 5/1000 \t Train Err: 32.2500\n",
- "Epoch 5/1000 \t Train Err: 32.5000\n",
- "Epoch 5/1000 \t Train Err: 31.8750\n",
- "Epoch 5/1000 \t Train Err: 31.6250\n",
- "Epoch 5/1000 \t Train Err: 31.6250\n",
- "Epoch 5/1000 \t Train Err: 31.6250\n",
- "Epoch 5/1000 \t Train Err: 31.8750\n",
- "Epoch 5/1000 \t Train Err: 31.5000\n",
- "Epoch 5/1000 \t Train Err: 31.2500\n",
- "Epoch 6/1000 \t Train Err: 31.1250\n",
- "Epoch 6/1000 \t Train Err: 31.1250\n",
- "Epoch 6/1000 \t Train Err: 31.2500\n",
- "Epoch 6/1000 \t Train Err: 31.2500\n",
- "Epoch 6/1000 \t Train Err: 31.0000\n",
- "Epoch 6/1000 \t Train Err: 30.8750\n",
- "Epoch 6/1000 \t Train Err: 31.0000\n",
- "Epoch 6/1000 \t Train Err: 30.8750\n",
- "Epoch 6/1000 \t Train Err: 30.8750\n",
- "Epoch 6/1000 \t Train Err: 30.8750\n",
- "Epoch 6/1000 \t Train Err: 30.7500\n",
- "Epoch 6/1000 \t Train Err: 30.6250\n",
- "Epoch 6/1000 \t Train Err: 30.5000\n",
- "Epoch 6/1000 \t Train Err: 30.7500\n",
- "Epoch 6/1000 \t Train Err: 30.3750\n",
- "Epoch 6/1000 \t Train Err: 30.5000\n",
- "Epoch 7/1000 \t Train Err: 30.6250\n",
- "Epoch 7/1000 \t Train Err: 30.5000\n",
- "Epoch 7/1000 \t Train Err: 30.3750\n",
- "Epoch 7/1000 \t Train Err: 30.5000\n",
- "Epoch 7/1000 \t Train Err: 30.5000\n",
- "Epoch 7/1000 \t Train Err: 30.5000\n",
- "Epoch 7/1000 \t Train Err: 30.3750\n",
- "Epoch 7/1000 \t Train Err: 30.2500\n",
- "Epoch 7/1000 \t Train Err: 30.2500\n",
- "Epoch 7/1000 \t Train Err: 30.2500\n",
- "Epoch 7/1000 \t Train Err: 30.1250\n",
- "Epoch 7/1000 \t Train Err: 30.0000\n",
- "Epoch 7/1000 \t Train Err: 30.2500\n",
- "Epoch 7/1000 \t Train Err: 30.1250\n",
- "Epoch 7/1000 \t Train Err: 30.1250\n",
- "Epoch 7/1000 \t Train Err: 30.0000\n",
- "Epoch 8/1000 \t Train Err: 30.0000\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 30.0000\n",
- "Epoch 8/1000 \t Train Err: 30.0000\n",
- "Epoch 8/1000 \t Train Err: 29.7500\n",
- "Epoch 8/1000 \t Train Err: 30.0000\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.6250\n",
- "Epoch 8/1000 \t Train Err: 29.6250\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.5000\n",
- "Epoch 8/1000 \t Train Err: 29.8750\n",
- "Epoch 8/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.7500\n",
- "Epoch 9/1000 \t Train Err: 29.7500\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.6250\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.3750\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.3750\n",
- "Epoch 9/1000 \t Train Err: 29.5000\n",
- "Epoch 9/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.3750\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.5000\n",
- "Epoch 10/1000 \t Train Err: 29.3750\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.3750\n",
- "Epoch 10/1000 \t Train Err: 29.3750\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.2500\n",
- "Epoch 10/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.2500\n",
- "Epoch 11/1000 \t Train Err: 29.2500\n",
- "Epoch 11/1000 \t Train Err: 29.2500\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.0000\n",
- "Epoch 11/1000 \t Train Err: 29.2500\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.0000\n",
- "Epoch 11/1000 \t Train Err: 29.0000\n",
- "Epoch 11/1000 \t Train Err: 29.0000\n",
- "Epoch 11/1000 \t Train Err: 29.0000\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.2500\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 11/1000 \t Train Err: 29.1250\n",
- "Epoch 12/1000 \t Train Err: 29.1250\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 29.1250\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 29.0000\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 12/1000 \t Train Err: 28.7500\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 12/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 29.0000\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 29.1250\n",
- "Epoch 13/1000 \t Train Err: 29.0000\n",
- "Epoch 13/1000 \t Train Err: 29.0000\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 29.0000\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 28.7500\n",
- "Epoch 13/1000 \t Train Err: 28.6250\n",
- "Epoch 13/1000 \t Train Err: 28.6250\n",
- "Epoch 13/1000 \t Train Err: 28.8750\n",
- "Epoch 13/1000 \t Train Err: 28.6250\n",
- "Epoch 13/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.8750\n",
- "Epoch 14/1000 \t Train Err: 28.5000\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.8750\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.8750\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.8750\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 14/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.7500\n",
- "Epoch 15/1000 \t Train Err: 28.5000\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 15/1000 \t Train Err: 28.5000\n",
- "Epoch 15/1000 \t Train Err: 28.6250\n",
- "Epoch 16/1000 \t Train Err: 28.3750\n",
- "Epoch 16/1000 \t Train Err: 28.2500\n",
- "Epoch 16/1000 \t Train Err: 28.1250\n",
- "Epoch 16/1000 \t Train Err: 27.8750\n",
- "Epoch 16/1000 \t Train Err: 28.0000\n",
- "Epoch 16/1000 \t Train Err: 27.6250\n",
- "Epoch 16/1000 \t Train Err: 27.5000\n",
- "Epoch 16/1000 \t Train Err: 27.2500\n",
- "Epoch 16/1000 \t Train Err: 27.1250\n",
- "Epoch 16/1000 \t Train Err: 27.0000\n",
- "Epoch 16/1000 \t Train Err: 26.5000\n",
- "Epoch 16/1000 \t Train Err: 27.0000\n",
- "Epoch 16/1000 \t Train Err: 26.5000\n",
- "Epoch 16/1000 \t Train Err: 26.3750\n",
- "Epoch 16/1000 \t Train Err: 25.6250\n",
- "Epoch 16/1000 \t Train Err: 25.8750\n",
- "Epoch 17/1000 \t Train Err: 25.2500\n",
- "Epoch 17/1000 \t Train Err: 25.1250\n",
- "Epoch 17/1000 \t Train Err: 24.8750\n",
- "Epoch 17/1000 \t Train Err: 24.7500\n",
- "Epoch 17/1000 \t Train Err: 24.1250\n",
- "Epoch 17/1000 \t Train Err: 23.8750\n",
- "Epoch 17/1000 \t Train Err: 23.7500\n",
- "Epoch 17/1000 \t Train Err: 23.5000\n",
- "Epoch 17/1000 \t Train Err: 23.1250\n",
- "Epoch 17/1000 \t Train Err: 22.8750\n"
+ "Epoch 0/100 \t Train Err: 87.5000 0.62109375 3.28125 8.125 222.0\n",
+ "Epoch 0/100 \t Train Err: 70.5000 0.5078125 0.173828125 1.953125 182.0\n",
+ "Epoch 0/100 \t Train Err: 59.7500 2.828125 0.4140625 0.134765625 154.0\n",
+ "Epoch 0/100 \t Train Err: 54.0000 5.5 1.734375 0.1279296875 137.0\n",
+ "Epoch 0/100 \t Train Err: 50.7500 7.9375 3.21875 0.6953125 126.0\n",
+ "Epoch 0/100 \t Train Err: 48.5000 10.0 4.625 1.40625 118.0\n",
+ "Epoch 0/100 \t Train Err: 46.7500 11.75 5.84375 2.109375 111.5\n",
+ "Epoch 0/100 \t Train Err: 45.7500 13.125 6.90625 2.75 107.5\n",
+ "Epoch 1/100 \t Train Err: 44.7500 14.25 7.75 3.28125 104.0\n",
+ "Epoch 1/100 \t Train Err: 44.5000 15.1875 8.4375 3.71875 102.0\n",
+ "Epoch 1/100 \t Train Err: 44.2500 15.875 9.0 4.09375 100.0\n",
+ "Epoch 1/100 \t Train Err: 43.7500 16.5 9.4375 4.34375 98.5\n",
+ "Epoch 1/100 \t Train Err: 43.7500 16.875 9.8125 4.59375 97.5\n",
+ "Epoch 1/100 \t Train Err: 43.5000 17.25 10.1875 4.8125 96.5\n",
+ "Epoch 1/100 \t Train Err: 43.2500 17.625 10.4375 5.0 95.0\n",
+ "Epoch 1/100 \t Train Err: 43.2500 18.0 10.6875 5.1875 95.0\n",
+ "Epoch 2/100 \t Train Err: 43.0000 18.5 11.0 5.34375 94.0\n",
+ "Epoch 2/100 \t Train Err: 42.5000 18.75 11.25 5.5625 92.5\n",
+ "Epoch 2/100 \t Train Err: 42.7500 19.125 11.5625 5.75 92.5\n",
+ "Epoch 2/100 \t Train Err: 42.5000 19.5 11.8125 5.9375 91.5\n",
+ "Epoch 2/100 \t Train Err: 42.0000 19.875 12.1875 6.1875 90.0\n",
+ "Epoch 2/100 \t Train Err: 42.2500 20.25 12.5 6.40625 90.0\n",
+ "Epoch 2/100 \t Train Err: 42.0000 20.625 12.6875 6.59375 89.0\n",
+ "Epoch 2/100 \t Train Err: 41.7500 21.0 13.0625 6.84375 88.0\n",
+ "Epoch 3/100 \t Train Err: 42.2500 21.375 13.375 7.0625 88.0\n",
+ "Epoch 3/100 \t Train Err: 41.7500 21.75 13.6875 7.28125 86.0\n",
+ "Epoch 3/100 \t Train Err: 41.5000 22.125 14.0 7.5625 85.5\n",
+ "Epoch 3/100 \t Train Err: 41.7500 22.5 14.3125 7.75 85.5\n",
+ "Epoch 3/100 \t Train Err: 41.2500 22.875 14.5625 7.9375 84.5\n",
+ "Epoch 3/100 \t Train Err: 41.2500 23.25 14.875 8.1875 83.5\n",
+ "Epoch 3/100 \t Train Err: 41.5000 23.5 15.1875 8.4375 83.5\n",
+ "Epoch 3/100 \t Train Err: 41.2500 23.75 15.4375 8.625 82.0\n",
+ "Epoch 4/100 \t Train Err: 41.0000 24.125 15.75 8.8125 81.0\n",
+ "Epoch 4/100 \t Train Err: 40.7500 24.375 16.0 9.0625 81.0\n",
+ "Epoch 4/100 \t Train Err: 40.7500 24.5 16.25 9.25 80.5\n",
+ "Epoch 4/100 \t Train Err: 40.7500 24.625 16.5 9.4375 79.5\n",
+ "Epoch 4/100 \t Train Err: 40.5000 24.75 16.75 9.625 79.0\n",
+ "Epoch 4/100 \t Train Err: 40.5000 24.625 16.875 9.75 79.0\n",
+ "Epoch 4/100 \t Train Err: 40.2500 24.375 17.125 9.875 78.5\n",
+ "Epoch 4/100 \t Train Err: 40.0000 23.75 17.125 10.0 78.0\n",
+ "Epoch 5/100 \t Train Err: 39.7500 23.0 17.125 10.0625 77.5\n",
+ "Epoch 5/100 \t Train Err: 39.5000 21.5 17.0 10.0 78.0\n",
+ "Epoch 5/100 \t Train Err: 38.7500 19.375 16.75 9.875 78.0\n",
+ "Epoch 5/100 \t Train Err: 38.5000 16.5 16.25 9.6875 78.5\n",
+ "Epoch 5/100 \t Train Err: 37.5000 12.9375 15.625 9.375 79.0\n",
+ "Epoch 5/100 \t Train Err: 36.5000 8.875 14.9375 9.125 80.0\n",
+ "Epoch 5/100 \t Train Err: 35.5000 5.09375 14.6875 9.25 79.5\n",
+ "Epoch 5/100 \t Train Err: 34.5000 2.390625 15.5 10.0 78.0\n",
+ "Epoch 6/100 \t Train Err: 33.5000 0.9140625 17.5 11.3125 75.0\n",
+ "Epoch 6/100 \t Train Err: 33.0000 0.38671875 19.875 12.4375 72.5\n",
+ "Epoch 6/100 \t Train Err: 32.7500 0.4921875 21.0 12.9375 71.5\n",
+ "Epoch 6/100 \t Train Err: 33.0000 0.85546875 21.375 13.0 71.0\n",
+ "Epoch 6/100 \t Train Err: 33.0000 1.1328125 21.5 13.125 70.5\n",
+ "Epoch 6/100 \t Train Err: 32.7500 1.1875 21.875 13.4375 69.5\n",
+ "Epoch 6/100 \t Train Err: 32.5000 1.0234375 22.5 13.9375 68.5\n",
+ "Epoch 6/100 \t Train Err: 32.2500 0.73828125 23.125 14.5 67.5\n",
+ "Epoch 7/100 \t Train Err: 31.8750 0.451171875 23.875 15.0625 66.0\n",
+ "Epoch 7/100 \t Train Err: 31.6250 0.251953125 24.625 15.625 64.5\n",
+ "Epoch 7/100 \t Train Err: 31.5000 0.2060546875 25.25 16.125 63.75\n",
+ "Epoch 7/100 \t Train Err: 31.2500 0.2734375 25.625 16.5 63.0\n",
+ "Epoch 7/100 \t Train Err: 31.1250 0.37109375 26.125 17.0 62.25\n",
+ "Epoch 7/100 \t Train Err: 30.8750 0.400390625 26.625 17.25 61.5\n",
+ "Epoch 7/100 \t Train Err: 30.8750 0.353515625 26.875 17.5 61.0\n",
+ "Epoch 7/100 \t Train Err: 30.7500 0.275390625 27.25 17.75 60.5\n",
+ "Epoch 8/100 \t Train Err: 30.6250 0.18359375 27.625 18.125 59.75\n",
+ "Epoch 8/100 \t Train Err: 30.5000 0.10986328125 28.125 18.625 59.0\n",
+ "Epoch 8/100 \t Train Err: 30.3750 0.06640625 28.625 19.0 58.5\n",
+ "Epoch 8/100 \t Train Err: 30.3750 0.04931640625 29.125 19.375 57.75\n",
+ "Epoch 8/100 \t Train Err: 30.1250 0.048583984375 29.75 19.875 57.0\n",
+ "Epoch 8/100 \t Train Err: 30.0000 0.054443359375 30.25 20.25 56.0\n",
+ "Epoch 8/100 \t Train Err: 29.8750 0.0576171875 30.875 20.875 55.25\n",
+ "Epoch 8/100 \t Train Err: 29.8750 0.056884765625 31.5 21.25 54.5\n",
+ "Epoch 9/100 \t Train Err: 29.7500 0.051025390625 32.0 21.75 53.75\n",
+ "Epoch 9/100 \t Train Err: 29.5000 0.04296875 32.75 22.25 53.0\n",
+ "Epoch 9/100 \t Train Err: 29.5000 0.03369140625 33.0 22.625 52.25\n",
+ "Epoch 9/100 \t Train Err: 29.5000 0.0260009765625 33.75 23.125 51.75\n",
+ "Epoch 9/100 \t Train Err: 29.3750 0.02197265625 34.25 23.5 51.25\n",
+ "Epoch 9/100 \t Train Err: 29.3750 0.0216064453125 35.0 24.125 50.25\n",
+ "Epoch 9/100 \t Train Err: 29.2500 0.0238037109375 35.25 24.375 50.0\n",
+ "Epoch 9/100 \t Train Err: 29.1250 0.02734375 35.75 24.75 49.5\n",
+ "Epoch 10/100 \t Train Err: 29.1250 0.0301513671875 36.0 25.0 49.0\n",
+ "Epoch 10/100 \t Train Err: 29.1250 0.032470703125 36.75 25.625 48.25\n",
+ "Epoch 10/100 \t Train Err: 29.0000 0.03271484375 37.25 26.125 47.5\n",
+ "Epoch 10/100 \t Train Err: 28.8750 0.03125 37.5 26.25 47.25\n",
+ "Epoch 10/100 \t Train Err: 29.0000 0.027587890625 38.0 26.75 46.5\n",
+ "Epoch 10/100 \t Train Err: 28.8750 0.023193359375 38.25 26.875 46.5\n",
+ "Epoch 10/100 \t Train Err: 28.8750 0.0196533203125 38.25 26.875 46.5\n",
+ "Epoch 10/100 \t Train Err: 28.7500 0.0172119140625 38.75 27.375 45.75\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.0166015625 39.0 27.5 45.5\n",
+ "Epoch 11/100 \t Train Err: 28.8750 0.0169677734375 39.0 27.5 45.5\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.0172119140625 39.0 27.5 45.5\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.017578125 39.75 28.25 44.75\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.017578125 39.75 28.25 44.75\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.017333984375 39.75 28.25 44.75\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.016845703125 39.75 28.25 44.75\n",
+ "Epoch 11/100 \t Train Err: 28.7500 0.016357421875 39.75 28.25 44.75\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.015869140625 40.0 28.5 44.25\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.01513671875 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.01483154296875 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.8750 0.01416015625 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.0140380859375 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.01397705078125 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.7500 0.0140380859375 40.75 28.875 44.0\n",
+ "Epoch 12/100 \t Train Err: 28.6250 0.01422119140625 40.75 29.0 43.75\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.01422119140625 41.0 29.375 43.25\n",
+ "Epoch 13/100 \t Train Err: 28.7500 0.01416015625 41.5 29.5 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.7500 0.0142822265625 41.5 29.625 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.01446533203125 41.5 29.625 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.01422119140625 41.5 29.625 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.013916015625 41.5 29.625 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.01373291015625 41.5 29.625 43.0\n",
+ "Epoch 13/100 \t Train Err: 28.6250 0.0135498046875 41.5 29.625 43.0\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01318359375 41.5 29.625 43.0\n",
+ "Epoch 14/100 \t Train Err: 28.5000 0.012939453125 41.5 29.625 42.75\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01275634765625 41.75 29.875 42.5\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.012451171875 42.0 30.125 42.5\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01220703125 42.25 30.25 42.25\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01226806640625 42.25 30.25 42.25\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01190185546875 42.25 30.25 42.25\n",
+ "Epoch 14/100 \t Train Err: 28.6250 0.01190185546875 42.25 30.25 42.25\n",
+ "Epoch 15/100 \t Train Err: 28.7500 0.0118408203125 42.25 30.25 42.25\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.0115966796875 42.25 30.25 42.25\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.0115966796875 42.25 30.25 42.25\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.01141357421875 42.25 30.25 42.25\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.011474609375 42.25 30.25 42.0\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.01123046875 42.25 30.375 42.0\n",
+ "Epoch 15/100 \t Train Err: 28.5000 0.0111083984375 42.5 30.625 41.75\n",
+ "Epoch 15/100 \t Train Err: 28.6250 0.010986328125 42.5 30.75 41.75\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.01104736328125 42.75 30.875 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.01092529296875 42.75 30.875 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.0107421875 43.0 31.0 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.0107421875 43.0 31.0 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.01068115234375 43.0 31.0 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.01043701171875 43.0 31.0 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.0103759765625 43.0 31.0 41.5\n",
+ "Epoch 16/100 \t Train Err: 28.6250 0.01025390625 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.0101318359375 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.0098876953125 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.00982666015625 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.009765625 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.00958251953125 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.00946044921875 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.5000 0.0093994140625 43.0 31.0 41.5\n",
+ "Epoch 17/100 \t Train Err: 28.6250 0.0091552734375 43.0 31.0 41.5\n",
+ "Epoch 18/100 \t Train Err: 28.6250 0.00897216796875 43.0 31.0 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.5000 0.0089111328125 43.0 31.0 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.3750 0.00885009765625 43.0 31.0 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.3750 0.0087890625 43.0 31.125 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.6250 0.0086669921875 43.0 31.125 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.5000 0.008544921875 43.0 31.125 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.5000 0.00836181640625 43.0 31.125 41.25\n",
+ "Epoch 18/100 \t Train Err: 28.5000 0.0081787109375 43.0 31.125 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.3750 0.0079345703125 43.0 31.125 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.5000 0.0078125 43.0 31.125 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.5000 0.007781982421875 43.0 31.0 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.5000 0.00750732421875 43.0 31.0 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.5000 0.00738525390625 42.75 30.875 41.25\n",
+ "Epoch 19/100 \t Train Err: 28.5000 0.00714111328125 42.5 30.75 41.5\n",
+ "Epoch 19/100 \t Train Err: 28.3750 0.006866455078125 42.25 30.5 41.5\n",
+ "Epoch 19/100 \t Train Err: 28.3750 0.0067138671875 41.75 30.125 42.0\n",
+ "Epoch 20/100 \t Train Err: 28.2500 0.006591796875 40.5 29.25 42.5\n",
+ "Epoch 20/100 \t Train Err: 28.1250 0.00634765625 37.5 27.125 44.5\n",
+ "Epoch 20/100 \t Train Err: 27.8750 0.0067138671875 27.75 19.875 52.0\n",
+ "Epoch 20/100 \t Train Err: 27.8750 0.0040283203125 25.875 18.5 53.5\n",
+ "Epoch 20/100 \t Train Err: 27.7500 0.011962890625 34.0 24.5 46.5\n",
+ "Epoch 20/100 \t Train Err: 27.8750 0.0240478515625 36.5 26.125 44.75\n",
+ "Epoch 20/100 \t Train Err: 27.6250 0.0267333984375 35.5 25.5 45.0\n",
+ "Epoch 20/100 \t Train Err: 27.2500 0.016357421875 30.125 21.5 48.5\n",
+ "Epoch 21/100 \t Train Err: 27.5000 0.005279541015625 19.5 13.5 57.5\n",
+ "Epoch 21/100 \t Train Err: 26.8750 0.00982666015625 28.875 20.875 48.25\n",
+ "Epoch 21/100 \t Train Err: 26.7500 0.01019287109375 32.5 23.875 45.0\n",
+ "Epoch 21/100 \t Train Err: 26.5000 0.0057373046875 27.75 20.625 47.5\n",
+ "Epoch 21/100 \t Train Err: 26.5000 0.0111083984375 14.0 10.375 58.5\n",
+ "Epoch 21/100 \t Train Err: 25.7500 0.007110595703125 27.625 21.875 45.0\n",
+ "Epoch 21/100 \t Train Err: 25.3750 0.0081787109375 27.625 22.25 44.25\n",
+ "Epoch 21/100 \t Train Err: 24.7500 0.0101318359375 11.4375 9.5 55.5\n",
+ "Epoch 22/100 \t Train Err: 23.7500 0.0091552734375 14.8125 12.625 50.0\n",
+ "Epoch 22/100 \t Train Err: 23.3750 0.0196533203125 18.5 16.5 45.5\n",
+ "Epoch 22/100 \t Train Err: 22.8750 0.0205078125 9.5625 8.25 52.0\n",
+ "Epoch 22/100 \t Train Err: 22.3750 0.045654296875 9.1875 7.90625 50.75\n",
+ "Epoch 22/100 \t Train Err: 22.2500 0.1318359375 15.375 13.8125 45.0\n",
+ "Epoch 22/100 \t Train Err: 21.6250 0.150390625 11.4375 9.5625 47.25\n",
+ "Epoch 22/100 \t Train Err: 21.3750 0.126953125 8.4375 6.34375 49.75\n",
+ "Epoch 22/100 \t Train Err: 20.8750 0.1455078125 11.0625 8.75 46.0\n",
+ "Epoch 23/100 \t Train Err: 20.6250 0.125 13.6875 11.4375 43.0\n",
+ "Epoch 23/100 \t Train Err: 20.3750 0.04931640625 11.625 9.625 44.0\n",
+ "Epoch 23/100 \t Train Err: 20.0000 0.033935546875 9.3125 7.6875 45.25\n",
+ "Epoch 23/100 \t Train Err: 19.6250 0.07275390625 10.0625 8.875 43.25\n",
+ "Epoch 23/100 \t Train Err: 19.5000 0.1181640625 11.5625 10.9375 41.0\n",
+ "Epoch 23/100 \t Train Err: 19.0000 0.1787109375 11.0 11.1875 40.25\n",
+ "Epoch 23/100 \t Train Err: 18.7500 0.25 8.1875 8.9375 41.75\n",
+ "Epoch 23/100 \t Train Err: 18.5000 0.2216796875 8.1875 9.9375 40.25\n",
+ "Epoch 24/100 \t Train Err: 18.1250 0.1513671875 10.0625 13.4375 37.0\n",
+ "Epoch 24/100 \t Train Err: 17.6250 0.12890625 7.6875 11.6875 37.75\n",
+ "Epoch 24/100 \t Train Err: 17.3750 0.1201171875 6.28125 10.875 38.0\n",
+ "Epoch 24/100 \t Train Err: 17.0000 0.126953125 7.53125 14.5625 35.0\n",
+ "Epoch 24/100 \t Train Err: 16.7500 0.11181640625 7.3125 15.6875 33.75\n",
+ "Epoch 24/100 \t Train Err: 16.5000 0.08203125 4.75 13.3125 35.5\n",
+ "Epoch 24/100 \t Train Err: 16.3750 0.068359375 5.75 17.125 32.75\n",
+ "Epoch 24/100 \t Train Err: 15.9375 0.057861328125 6.34375 19.25 30.75\n",
+ "Epoch 25/100 \t Train Err: 15.7500 0.051025390625 3.578125 14.5 33.5\n",
+ "Epoch 25/100 \t Train Err: 15.2500 0.04248046875 5.0 18.625 29.625\n",
+ "Epoch 25/100 \t Train Err: 15.0000 0.040771484375 5.53125 21.125 27.875\n",
+ "Epoch 25/100 \t Train Err: 14.8125 0.033935546875 3.171875 16.0 30.375\n",
+ "Epoch 25/100 \t Train Err: 14.6250 0.0322265625 3.734375 18.875 28.5\n",
+ "Epoch 25/100 \t Train Err: 14.3750 0.03369140625 5.09375 23.0 25.5\n",
+ "Epoch 25/100 \t Train Err: 14.1250 0.028076171875 2.046875 14.3125 30.125\n",
+ "Epoch 25/100 \t Train Err: 13.8125 0.023681640625 3.234375 19.375 26.625\n",
+ "Epoch 26/100 \t Train Err: 13.6875 0.023681640625 4.75 24.875 23.125\n",
+ "Epoch 26/100 \t Train Err: 13.5625 0.0245361328125 1.515625 13.6875 29.0\n",
+ "Epoch 26/100 \t Train Err: 13.0625 0.0179443359375 2.875 20.5 24.25\n",
+ "Epoch 26/100 \t Train Err: 13.0000 0.016845703125 3.5 24.0 22.25\n",
+ "Epoch 26/100 \t Train Err: 12.8750 0.02197265625 1.46875 15.625 26.5\n",
+ "Epoch 26/100 \t Train Err: 12.5000 0.0174560546875 2.03125 19.5 23.5\n",
+ "Epoch 26/100 \t Train Err: 12.4375 0.014404296875 3.0 24.75 20.625\n",
+ "Epoch 26/100 \t Train Err: 12.1250 0.0230712890625 1.46875 17.625 23.875\n",
+ "Epoch 27/100 \t Train Err: 11.9375 0.022705078125 1.421875 17.75 23.125\n",
+ "Epoch 27/100 \t Train Err: 11.7500 0.0150146484375 2.09375 22.625 20.0\n",
+ "Epoch 27/100 \t Train Err: 11.6250 0.01531982421875 1.6796875 20.875 20.75\n",
+ "Epoch 27/100 \t Train Err: 11.3750 0.0177001953125 1.0546875 17.25 22.0\n",
+ "Epoch 27/100 \t Train Err: 11.0625 0.0128173828125 1.359375 20.375 19.875\n",
+ "Epoch 27/100 \t Train Err: 11.0000 0.0128173828125 1.5078125 22.0 18.875\n",
+ "Epoch 27/100 \t Train Err: 10.8125 0.01190185546875 1.03125 18.125 20.125\n",
+ "Epoch 27/100 \t Train Err: 10.7500 0.01165771484375 0.99609375 18.125 20.25\n",
+ "Epoch 28/100 \t Train Err: 10.5625 0.012451171875 1.328125 21.125 18.125\n",
+ "Epoch 28/100 \t Train Err: 10.3750 0.01104736328125 1.15625 19.375 18.625\n",
+ "Epoch 28/100 \t Train Err: 10.3125 0.01025390625 0.953125 17.25 19.5\n",
+ "Epoch 28/100 \t Train Err: 10.1250 0.010498046875 1.171875 19.875 17.75\n",
+ "Epoch 28/100 \t Train Err: 10.0625 0.0101318359375 1.109375 20.0 17.5\n",
+ "Epoch 28/100 \t Train Err: 10.0000 0.0111083984375 0.7578125 16.75 18.875\n",
+ "Epoch 28/100 \t Train Err: 9.8125 0.0093994140625 0.87109375 18.375 17.5\n",
+ "Epoch 28/100 \t Train Err: 9.7500 0.01043701171875 1.0390625 20.625 16.375\n",
+ "Epoch 29/100 \t Train Err: 9.4375 0.00921630859375 0.828125 18.5 16.75\n",
+ "Epoch 29/100 \t Train Err: 9.5000 0.00836181640625 0.59375 16.5 17.75\n",
+ "Epoch 29/100 \t Train Err: 9.4375 0.0115966796875 0.796875 20.375 15.8125\n",
+ "Epoch 29/100 \t Train Err: 9.3125 0.010986328125 0.72265625 20.25 15.625\n",
+ "Epoch 29/100 \t Train Err: 9.1875 0.00762939453125 0.51953125 16.75 16.875\n",
+ "Epoch 29/100 \t Train Err: 9.0625 0.00799560546875 0.56640625 18.375 15.8125\n",
+ "Epoch 29/100 \t Train Err: 9.0625 0.00946044921875 0.66796875 20.625 14.625\n",
+ "Epoch 29/100 \t Train Err: 9.0000 0.00665283203125 0.46484375 16.125 16.5\n",
+ "Epoch 30/100 \t Train Err: 8.8750 0.008056640625 0.5234375 18.25 15.3125\n",
+ "Epoch 30/100 \t Train Err: 8.7500 0.0111083984375 0.59375 20.0 14.1875\n",
+ "Epoch 36/100 \t Train Err: 7.3750 0.00799560546875 0.302734375 13.5 12.9375\n",
+ "Epoch 36/100 \t Train Err: 7.2188 0.00799560546875 0.369140625 15.5625 11.375\n",
+ "Epoch 36/100 \t Train Err: 7.2188 0.00823974609375 0.4296875 17.375 10.375\n",
+ "Epoch 36/100 \t Train Err: 7.2500 0.00860595703125 0.412109375 18.0 10.125\n",
+ "Epoch 36/100 \t Train Err: 7.1875 0.01171875 0.33984375 15.625 11.1875\n",
+ "Epoch 36/100 \t Train Err: 7.0625 0.0177001953125 0.2890625 12.875 12.1875\n",
+ "Epoch 36/100 \t Train Err: 7.1562 0.01806640625 0.271484375 11.8125 13.0\n",
+ "Epoch 36/100 \t Train Err: 7.1875 0.0120849609375 0.24609375 11.5625 13.0625\n",
+ "Epoch 37/100 \t Train Err: 7.0625 0.007171630859375 0.2431640625 12.375 12.3125\n",
+ "Epoch 37/100 \t Train Err: 7.0625 0.0101318359375 0.2490234375 14.0 11.5\n",
+ "Epoch 37/100 \t Train Err: 7.0625 0.0181884765625 0.28125 15.4375 10.875\n",
+ "Epoch 37/100 \t Train Err: 7.0938 0.0244140625 0.287109375 15.8125 10.6875\n",
+ "Epoch 37/100 \t Train Err: 6.9375 0.0230712890625 0.27734375 15.1875 10.625\n",
+ "Epoch 37/100 \t Train Err: 6.8750 0.01556396484375 0.255859375 13.5625 11.4375\n",
+ "Epoch 37/100 \t Train Err: 6.9375 0.0091552734375 0.220703125 12.4375 12.0625\n",
+ "Epoch 37/100 \t Train Err: 6.9375 0.006011962890625 0.2158203125 12.4375 12.0625\n",
+ "Epoch 38/100 \t Train Err: 6.8438 0.004791259765625 0.232421875 12.9375 11.625\n",
+ "Epoch 38/100 \t Train Err: 6.8750 0.004486083984375 0.2421875 14.3125 10.875\n",
+ "Epoch 38/100 \t Train Err: 6.8438 0.00433349609375 0.28125 15.0625 10.1875\n",
+ "Epoch 38/100 \t Train Err: 6.9375 0.004241943359375 0.25 14.9375 10.4375\n",
+ "Epoch 38/100 \t Train Err: 6.7500 0.004241943359375 0.23828125 13.75 10.625\n",
+ "Epoch 38/100 \t Train Err: 6.7812 0.0042724609375 0.2109375 12.25 11.5625\n",
+ "Epoch 38/100 \t Train Err: 6.8438 0.003692626953125 0.1943359375 11.9375 11.875\n",
+ "Epoch 38/100 \t Train Err: 6.6875 0.004119873046875 0.197265625 11.5 11.5\n",
+ "Epoch 39/100 \t Train Err: 6.6562 0.007720947265625 0.193359375 12.1875 11.0625\n",
+ "Epoch 39/100 \t Train Err: 6.6250 0.01318359375 0.2080078125 13.25 10.4375\n",
+ "Epoch 39/100 \t Train Err: 6.6562 0.016357421875 0.224609375 13.9375 10.3125\n",
+ "Epoch 39/100 \t Train Err: 6.6562 0.0159912109375 0.2021484375 13.75 10.375\n",
+ "Epoch 39/100 \t Train Err: 6.5312 0.0126953125 0.19140625 12.9375 10.5\n",
+ "Epoch 39/100 \t Train Err: 6.5938 0.0081787109375 0.1796875 11.9375 11.0625\n",
+ "Epoch 39/100 \t Train Err: 6.6250 0.005401611328125 0.1796875 11.875 11.375\n",
+ "Epoch 39/100 \t Train Err: 6.5000 0.0040283203125 0.1787109375 12.125 10.9375\n",
+ "Epoch 40/100 \t Train Err: 6.5312 0.0031890869140625 0.1962890625 12.8125 10.5625\n",
+ "Epoch 40/100 \t Train Err: 6.5625 0.0029296875 0.2080078125 13.25 10.3125\n",
+ "Epoch 40/100 \t Train Err: 6.5625 0.0026702880859375 0.189453125 13.5 10.25\n",
+ "Epoch 40/100 \t Train Err: 6.5312 0.002685546875 0.177734375 12.5625 10.4375\n",
+ "Epoch 40/100 \t Train Err: 6.4375 0.0027008056640625 0.169921875 11.625 10.8125\n",
+ "Epoch 40/100 \t Train Err: 6.5000 0.0026092529296875 0.1630859375 11.6875 11.0625\n",
+ "Epoch 40/100 \t Train Err: 6.5000 0.0030670166015625 0.162109375 11.9375 10.875\n",
+ "Epoch 40/100 \t Train Err: 6.5000 0.004486083984375 0.1630859375 12.4375 10.5625\n",
+ "Epoch 41/100 \t Train Err: 6.4375 0.006011962890625 0.1875 13.3125 9.9375\n",
+ "Epoch 41/100 \t Train Err: 6.4688 0.005706787109375 0.1708984375 12.75 10.25\n",
+ "Epoch 41/100 \t Train Err: 6.4688 0.00445556640625 0.15234375 12.25 10.625\n",
+ "Epoch 41/100 \t Train Err: 6.4688 0.0032501220703125 0.166015625 11.8125 10.875\n",
+ "Epoch 41/100 \t Train Err: 6.3750 0.0027008056640625 0.166015625 12.0625 10.5\n",
+ "Epoch 41/100 \t Train Err: 6.3125 0.0023040771484375 0.158203125 12.0 10.25\n",
+ "Epoch 41/100 \t Train Err: 6.4062 0.002227783203125 0.1640625 12.4375 10.125\n",
+ "Epoch 41/100 \t Train Err: 6.3438 0.002227783203125 0.171875 12.6875 9.9375\n",
+ "Epoch 42/100 \t Train Err: 6.3125 0.002197265625 0.1591796875 12.0625 10.1875\n",
+ "Epoch 42/100 \t Train Err: 6.2500 0.0021209716796875 0.1513671875 11.4375 10.3125\n",
+ "Epoch 42/100 \t Train Err: 6.2812 0.0022430419921875 0.1396484375 11.5 10.5\n",
+ "Epoch 42/100 \t Train Err: 6.1875 0.002838134765625 0.146484375 11.8125 9.9375\n",
+ "Epoch 42/100 \t Train Err: 6.3125 0.0037078857421875 0.150390625 12.125 10.0625\n",
+ "Epoch 42/100 \t Train Err: 6.2812 0.004425048828125 0.1591796875 12.375 9.875\n",
+ "Epoch 42/100 \t Train Err: 6.2188 0.004150390625 0.1357421875 11.625 10.0625\n",
+ "Epoch 42/100 \t Train Err: 6.2188 0.0035858154296875 0.1416015625 11.4375 10.25\n",
+ "Epoch 43/100 \t Train Err: 6.2500 0.0028839111328125 0.1328125 11.1875 10.4375\n",
+ "Epoch 43/100 \t Train Err: 6.1562 0.0025482177734375 0.13671875 11.1875 10.125\n",
+ "Epoch 43/100 \t Train Err: 6.0938 0.002227783203125 0.142578125 11.625 9.75\n",
+ "Epoch 43/100 \t Train Err: 6.1875 0.002105712890625 0.1435546875 12.0625 9.8125\n",
+ "Epoch 43/100 \t Train Err: 6.2812 0.001983642578125 0.150390625 12.125 9.9375\n",
+ "Epoch 43/100 \t Train Err: 6.0938 0.0019683837890625 0.1396484375 11.5 9.8125\n",
+ "Epoch 43/100 \t Train Err: 6.2188 0.00191497802734375 0.1337890625 11.5 10.125\n",
+ "Epoch 43/100 \t Train Err: 6.0938 0.00201416015625 0.1337890625 11.4375 9.875\n",
+ "Epoch 44/100 \t Train Err: 6.0938 0.0023040771484375 0.140625 11.375 9.9375\n",
+ "Epoch 44/100 \t Train Err: 6.0938 0.002960205078125 0.1298828125 11.125 10.0\n",
+ "Epoch 44/100 \t Train Err: 6.1562 0.003662109375 0.1357421875 11.375 10.0\n",
+ "Epoch 44/100 \t Train Err: 6.0625 0.003997802734375 0.130859375 11.4375 9.75\n",
+ "Epoch 44/100 \t Train Err: 6.0312 0.003997802734375 0.134765625 11.4375 9.5625\n",
+ "Epoch 44/100 \t Train Err: 6.0000 0.003265380859375 0.1337890625 11.4375 9.625\n",
+ "Epoch 44/100 \t Train Err: 6.0000 0.0024871826171875 0.1337890625 11.5 9.625\n",
+ "Epoch 44/100 \t Train Err: 6.0312 0.0020904541015625 0.1376953125 11.0625 9.8125\n",
+ "Epoch 45/100 \t Train Err: 5.9688 0.0020294189453125 0.125 10.8125 9.6875\n",
+ "Epoch 45/100 \t Train Err: 6.0000 0.0019683837890625 0.1318359375 10.75 9.75\n",
+ "Epoch 45/100 \t Train Err: 6.0312 0.002044677734375 0.12890625 11.0625 9.6875\n",
+ "Epoch 45/100 \t Train Err: 5.9688 0.002197265625 0.12353515625 11.0625 9.6875\n",
+ "Epoch 45/100 \t Train Err: 5.8750 0.0026397705078125 0.1318359375 11.3125 9.375\n",
+ "Epoch 45/100 \t Train Err: 5.9375 0.003204345703125 0.1201171875 11.25 9.4375\n",
+ "Epoch 45/100 \t Train Err: 5.8125 0.003326416015625 0.115234375 11.0 9.375\n",
+ "Epoch 45/100 \t Train Err: 5.9062 0.0030975341796875 0.111328125 11.0 9.5625\n",
+ "Epoch 46/100 \t Train Err: 5.9062 0.0026702880859375 0.10498046875 10.5 9.75\n",
+ "Epoch 46/100 \t Train Err: 5.8125 0.0024566650390625 0.1044921875 10.375 9.625\n",
+ "Epoch 46/100 \t Train Err: 5.8438 0.0024566650390625 0.11474609375 10.875 9.5\n",
+ "Epoch 46/100 \t Train Err: 5.8438 0.0023956298828125 0.11962890625 11.375 9.1875\n",
+ "Epoch 46/100 \t Train Err: 5.7812 0.0023651123046875 0.12060546875 11.125 9.125\n",
+ "Epoch 46/100 \t Train Err: 5.9062 0.0023193359375 0.11767578125 10.875 9.5625\n",
+ "Epoch 46/100 \t Train Err: 5.7500 0.002349853515625 0.09912109375 10.25 9.5625\n",
+ "Epoch 46/100 \t Train Err: 5.7812 0.0024871826171875 0.10986328125 9.9375 9.8125\n",
+ "Epoch 47/100 \t Train Err: 5.7812 0.002960205078125 0.107421875 10.5 9.375\n",
+ "Epoch 47/100 \t Train Err: 5.7812 0.0032501220703125 0.1123046875 10.875 9.1875\n",
+ "Epoch 47/100 \t Train Err: 5.7188 0.0033111572265625 0.11767578125 10.5625 9.125\n",
+ "Epoch 47/100 \t Train Err: 5.7812 0.0030517578125 0.10986328125 10.4375 9.5\n",
+ "Epoch 47/100 \t Train Err: 5.6562 0.002899169921875 0.1181640625 10.625 9.0\n",
+ "Epoch 47/100 \t Train Err: 5.6562 0.0026702880859375 0.12109375 11.0 8.875\n",
+ "Epoch 47/100 \t Train Err: 5.7812 0.00262451171875 0.10302734375 10.4375 9.375\n",
+ "Epoch 47/100 \t Train Err: 5.7812 0.0026702880859375 0.1015625 9.8125 9.75\n",
+ "Epoch 48/100 \t Train Err: 5.7188 0.002655029296875 0.09814453125 9.9375 9.4375\n",
+ "Epoch 48/100 \t Train Err: 5.6562 0.003143310546875 0.111328125 10.75 8.875\n",
+ "Epoch 48/100 \t Train Err: 5.5625 0.00335693359375 0.111328125 10.6875 8.75\n",
+ "Epoch 48/100 \t Train Err: 5.5625 0.003326416015625 0.1044921875 9.8125 9.1875\n",
+ "Epoch 48/100 \t Train Err: 5.6562 0.003265380859375 0.099609375 10.125 9.25\n",
+ "Epoch 48/100 \t Train Err: 5.6875 0.0030670166015625 0.10888671875 10.875 8.875\n",
+ "Epoch 48/100 \t Train Err: 5.5938 0.0027923583984375 0.09619140625 10.25 8.9375\n",
+ "Epoch 48/100 \t Train Err: 5.5938 0.0027313232421875 0.10400390625 9.4375 9.4375\n",
+ "Epoch 49/100 \t Train Err: 5.5312 0.002777099609375 0.09326171875 10.0 8.9375\n",
+ "Epoch 49/100 \t Train Err: 5.5938 0.0031890869140625 0.1015625 10.9375 8.5\n",
+ "Epoch 49/100 \t Train Err: 5.5000 0.00341796875 0.08984375 10.25 8.8125\n",
+ "Epoch 49/100 \t Train Err: 5.5938 0.003662109375 0.07666015625 8.8125 9.75\n",
+ "Epoch 49/100 \t Train Err: 5.5312 0.004547119140625 0.09375 9.6875 9.0625\n",
+ "Epoch 49/100 \t Train Err: 5.5000 0.004638671875 0.1103515625 11.125 8.125\n",
+ "Epoch 49/100 \t Train Err: 5.4375 0.0031890869140625 0.08447265625 8.875 9.3125\n",
+ "Epoch 49/100 \t Train Err: 5.5312 0.0031890869140625 0.0908203125 9.3125 9.375\n",
+ "Epoch 50/100 \t Train Err: 5.5625 0.003265380859375 0.10693359375 11.75 7.625\n",
+ "Epoch 50/100 \t Train Err: 5.5000 0.00341796875 0.07763671875 7.5625 10.1875\n",
+ "Epoch 50/100 \t Train Err: 5.3750 0.00323486328125 0.08056640625 8.8125 9.125\n",
+ "Epoch 50/100 \t Train Err: 5.4688 0.00433349609375 0.11767578125 11.875 7.46875\n",
+ "Epoch 50/100 \t Train Err: 5.5312 0.00433349609375 0.09130859375 8.625 9.75\n",
+ "Epoch 50/100 \t Train Err: 5.4062 0.0047607421875 0.087890625 9.3125 9.0\n",
+ "Epoch 50/100 \t Train Err: 5.4375 0.00457763671875 0.11767578125 11.875 7.4375\n",
+ "Epoch 50/100 \t Train Err: 5.5312 0.003387451171875 0.06640625 6.75 10.8125\n",
+ "Epoch 51/100 \t Train Err: 5.4062 0.003448486328125 0.08984375 9.5 8.75\n",
+ "Epoch 51/100 \t Train Err: 5.4062 0.003662109375 0.11279296875 12.0 7.40625\n",
+ "Epoch 51/100 \t Train Err: 5.3125 0.003692626953125 0.087890625 8.4375 9.1875\n",
+ "Epoch 51/100 \t Train Err: 5.3750 0.003692626953125 0.08349609375 8.5625 9.3125\n",
+ "Epoch 51/100 \t Train Err: 5.3125 0.0037994384765625 0.09765625 10.8125 7.9375\n",
+ "Epoch 51/100 \t Train Err: 5.3750 0.0036773681640625 0.0791015625 9.6875 8.4375\n",
+ "Epoch 51/100 \t Train Err: 5.3125 0.0037078857421875 0.06787109375 8.375 9.25\n",
+ "Epoch 51/100 \t Train Err: 5.2500 0.003936767578125 0.076171875 9.25 8.625\n",
+ "Epoch 52/100 \t Train Err: 5.3125 0.00433349609375 0.08349609375 10.375 8.0\n",
+ "Epoch 52/100 \t Train Err: 5.1875 0.00457763671875 0.0732421875 8.8125 8.6875\n",
+ "Epoch 52/100 \t Train Err: 5.2188 0.0047607421875 0.06787109375 8.25 8.9375\n",
+ "Epoch 52/100 \t Train Err: 5.2812 0.00518798828125 0.08056640625 9.5 8.3125\n",
+ "Epoch 52/100 \t Train Err: 5.0938 0.0047607421875 0.08154296875 9.5 7.9375\n",
+ "Epoch 52/100 \t Train Err: 5.2188 0.00396728515625 0.06591796875 8.0625 9.1875\n",
+ "Epoch 52/100 \t Train Err: 5.1250 0.004180908203125 0.07421875 9.5 8.0\n",
+ "Epoch 52/100 \t Train Err: 5.1250 0.004150390625 0.078125 9.5 8.0625\n",
+ "Epoch 53/100 \t Train Err: 5.1562 0.004180908203125 0.064453125 8.0 8.875\n",
+ "Epoch 53/100 \t Train Err: 5.0312 0.004608154296875 0.0703125 8.3125 8.4375\n",
+ "Epoch 53/100 \t Train Err: 5.0625 0.00531005859375 0.07861328125 8.625 8.1875\n",
+ "Epoch 53/100 \t Train Err: 5.0312 0.005340576171875 0.07763671875 9.0 8.0625\n",
+ "Epoch 53/100 \t Train Err: 5.0312 0.004791259765625 0.07421875 8.4375 8.25\n",
+ "Epoch 53/100 \t Train Err: 5.0312 0.00445556640625 0.0673828125 8.3125 8.375\n",
+ "Epoch 53/100 \t Train Err: 4.9688 0.004486083984375 0.06591796875 8.625 8.0\n",
+ "Epoch 53/100 \t Train Err: 5.0312 0.004486083984375 0.06396484375 8.25 8.4375\n",
+ "Epoch 54/100 \t Train Err: 4.9688 0.004425048828125 0.06689453125 8.3125 8.3125\n",
+ "Epoch 54/100 \t Train Err: 5.0000 0.00457763671875 0.07470703125 8.75 7.96875\n",
+ "Epoch 54/100 \t Train Err: 4.9375 0.004669189453125 0.07080078125 7.96875 8.25\n",
+ "Epoch 54/100 \t Train Err: 4.8750 0.004852294921875 0.07275390625 7.90625 8.1875\n",
+ "Epoch 54/100 \t Train Err: 4.9062 0.00494384765625 0.0791015625 8.5625 7.8125\n",
+ "Epoch 54/100 \t Train Err: 4.9688 0.0045166015625 0.0732421875 7.90625 8.3125\n",
+ "Epoch 54/100 \t Train Err: 4.9375 0.0045166015625 0.06689453125 7.625 8.4375\n",
+ "Epoch 54/100 \t Train Err: 4.8438 0.004608154296875 0.0791015625 8.625 7.5\n",
+ "Epoch 55/100 \t Train Err: 4.9062 0.0045166015625 0.0693359375 7.3125 8.5625\n",
+ "Epoch 55/100 \t Train Err: 4.9062 0.00494384765625 0.07958984375 8.3125 7.875\n",
+ "Epoch 55/100 \t Train Err: 4.8750 0.00531005859375 0.0849609375 8.6875 7.6875\n",
+ "Epoch 55/100 \t Train Err: 4.7812 0.004638671875 0.06494140625 6.8125 8.5625\n",
+ "Epoch 55/100 \t Train Err: 4.8438 0.004791259765625 0.08203125 9.0 7.1875\n",
+ "Epoch 55/100 \t Train Err: 4.9688 0.004241943359375 0.055419921875 5.40625 10.0\n",
+ "Epoch 55/100 \t Train Err: 6.1562 0.005401611328125 0.19140625 20.125 4.1875\n",
+ "Epoch 55/100 \t Train Err: 14.0000 0.005706787109375 0.107421875 0.53515625 36.0\n",
+ "Epoch 56/100 \t Train Err: 11.1875 0.314453125 0.1689453125 0.56640625 29.0\n",
+ "Epoch 56/100 \t Train Err: 7.6250 1.75 3.984375 17.0 9.1875\n",
+ "Epoch 56/100 \t Train Err: 12.3125 2.03125 11.1875 47.25 3.21875\n",
+ "Epoch 56/100 \t Train Err: 7.0625 0.88671875 1.78125 10.1875 12.5625\n",
+ "Epoch 56/100 \t Train Err: 8.7500 0.05078125 0.1953125 2.484375 21.875\n",
+ "Epoch 56/100 \t Train Err: 8.8750 0.1328125 0.0419921875 2.5625 22.0\n",
+ "Epoch 56/100 \t Train Err: 6.6562 0.32421875 0.0888671875 9.6875 12.8125\n",
+ "Epoch 56/100 \t Train Err: 7.5000 0.349609375 0.51171875 28.375 5.90625\n",
+ "Epoch 57/100 \t Train Err: 8.4375 0.337890625 0.66796875 35.75 4.53125\n",
+ "Epoch 57/100 \t Train Err: 7.1562 0.3125 0.265625 25.375 6.375\n",
+ "Epoch 57/100 \t Train Err: 6.3750 0.259765625 0.095703125 10.75 11.5\n",
+ "Epoch 57/100 \t Train Err: 7.0625 0.125 0.08837890625 5.75 15.75\n",
+ "Epoch 57/100 \t Train Err: 7.1562 0.022705078125 0.06005859375 5.75 16.0\n",
+ "Epoch 57/100 \t Train Err: 6.5625 0.1787109375 0.2412109375 8.625 13.0625\n",
+ "Epoch 57/100 \t Train Err: 6.4375 0.443359375 0.59765625 14.5 9.375\n",
+ "Epoch 57/100 \t Train Err: 6.5625 0.408203125 0.6484375 18.875 7.34375\n",
+ "Epoch 58/100 \t Train Err: 6.4062 0.150390625 0.337890625 19.25 6.9375\n",
+ "Epoch 58/100 \t Train Err: 6.1875 0.0218505859375 0.11865234375 16.5 7.65625\n",
+ "Epoch 58/100 \t Train Err: 5.9688 0.1796875 0.30078125 11.4375 9.4375\n",
+ "Epoch 58/100 \t Train Err: 6.1562 0.35546875 0.5703125 8.625 11.0625\n",
+ "Epoch 58/100 \t Train Err: 6.0312 0.31640625 0.55859375 8.125 11.375\n",
+ "Epoch 58/100 \t Train Err: 5.8125 0.130859375 0.333984375 9.375 10.125\n",
+ "Epoch 58/100 \t Train Err: 5.5938 0.01422119140625 0.138671875 12.125 8.4375\n",
+ "Epoch 58/100 \t Train Err: 5.6562 0.06884765625 0.1396484375 14.4375 7.28125\n",
+ "Epoch 59/100 \t Train Err: 5.6875 0.1884765625 0.20703125 14.5625 7.28125\n",
+ "Epoch 59/100 \t Train Err: 5.6562 0.24609375 0.216796875 12.9375 7.96875\n",
+ "Epoch 59/100 \t Train Err: 5.4688 0.2109375 0.162109375 10.3125 9.0\n",
+ "Epoch 59/100 \t Train Err: 5.4688 0.134765625 0.10888671875 8.6875 9.9375\n",
+ "Epoch 59/100 \t Train Err: 5.4375 0.06689453125 0.0966796875 8.125 10.25\n",
+ "Epoch 59/100 \t Train Err: 5.2812 0.0262451171875 0.11279296875 8.3125 9.6875\n",
+ "Epoch 59/100 \t Train Err: 5.2812 0.0106201171875 0.1416015625 10.375 8.5\n",
+ "Epoch 59/100 \t Train Err: 5.3125 0.0084228515625 0.177734375 12.5 7.46875\n",
+ "Epoch 60/100 \t Train Err: 5.3125 0.0126953125 0.1875 12.1875 7.28125\n",
+ "Epoch 60/100 \t Train Err: 5.2188 0.019775390625 0.1982421875 11.1875 7.5625\n",
+ "Epoch 60/100 \t Train Err: 5.1250 0.0240478515625 0.203125 9.3125 8.375\n",
+ "Epoch 60/100 \t Train Err: 5.1250 0.019775390625 0.1875 8.25 9.1875\n",
+ "Epoch 60/100 \t Train Err: 5.1250 0.010986328125 0.1572265625 7.71875 9.1875\n",
+ "Epoch 60/100 \t Train Err: 5.0312 0.007171630859375 0.1259765625 8.375 8.625\n",
+ "Epoch 60/100 \t Train Err: 4.9688 0.0150146484375 0.10400390625 9.125 8.0\n",
+ "Epoch 60/100 \t Train Err: 4.9375 0.0308837890625 0.09033203125 9.625 7.625\n",
+ "Epoch 61/100 \t Train Err: 4.9688 0.046630859375 0.08447265625 9.5625 7.65625\n",
+ "Epoch 61/100 \t Train Err: 4.9688 0.0546875 0.08154296875 9.125 7.875\n",
+ "Epoch 61/100 \t Train Err: 4.9688 0.054443359375 0.0712890625 8.1875 8.5\n",
+ "Epoch 61/100 \t Train Err: 4.9062 0.049072265625 0.07275390625 7.875 8.4375\n",
+ "Epoch 61/100 \t Train Err: 4.8125 0.040771484375 0.0693359375 7.71875 8.25\n",
+ "Epoch 61/100 \t Train Err: 4.8750 0.031494140625 0.06884765625 7.9375 8.4375\n",
+ "Epoch 61/100 \t Train Err: 4.8125 0.0228271484375 0.072265625 8.5625 7.8125\n",
+ "Epoch 61/100 \t Train Err: 4.8125 0.01611328125 0.07568359375 9.0 7.59375\n",
+ "Epoch 62/100 \t Train Err: 4.7500 0.010986328125 0.07861328125 8.5625 7.625\n",
+ "Epoch 62/100 \t Train Err: 4.6875 0.0079345703125 0.08203125 7.9375 7.625\n",
+ "Epoch 62/100 \t Train Err: 4.7500 0.00665283203125 0.0810546875 7.5 8.3125\n",
+ "Epoch 62/100 \t Train Err: 4.7188 0.00634765625 0.07861328125 7.09375 8.3125\n",
+ "Epoch 62/100 \t Train Err: 4.6562 0.006317138671875 0.08349609375 7.40625 8.0\n",
+ "Epoch 62/100 \t Train Err: 4.6562 0.006500244140625 0.0791015625 7.9375 7.625\n",
+ "Epoch 62/100 \t Train Err: 4.7188 0.006500244140625 0.0732421875 8.1875 7.6875\n",
+ "Epoch 62/100 \t Train Err: 4.6875 0.00677490234375 0.0771484375 8.5 7.53125\n",
+ "Epoch 63/100 \t Train Err: 4.6875 0.00732421875 0.07421875 7.9375 7.78125\n",
+ "Epoch 63/100 \t Train Err: 4.6562 0.00836181640625 0.0703125 7.84375 7.8125\n",
+ "Epoch 63/100 \t Train Err: 4.6875 0.0093994140625 0.0654296875 7.65625 7.90625\n",
+ "Epoch 63/100 \t Train Err: 4.6875 0.010498046875 0.0654296875 7.75 7.96875\n",
+ "Epoch 63/100 \t Train Err: 4.6250 0.01116943359375 0.0634765625 7.6875 7.71875\n",
+ "Epoch 63/100 \t Train Err: 4.6562 0.01153564453125 0.064453125 7.75 7.78125\n",
+ "Epoch 63/100 \t Train Err: 4.6562 0.0118408203125 0.060302734375 7.875 7.78125\n",
+ "Epoch 63/100 \t Train Err: 4.5938 0.01171875 0.0634765625 7.625 7.71875\n",
+ "Epoch 64/100 \t Train Err: 4.6562 0.010986328125 0.060791015625 7.53125 7.90625\n",
+ "Epoch 64/100 \t Train Err: 4.5312 0.01019287109375 0.05908203125 7.15625 7.6875\n",
+ "Epoch 64/100 \t Train Err: 4.5312 0.009033203125 0.064453125 7.28125 7.75\n",
+ "Epoch 64/100 \t Train Err: 4.5625 0.00836181640625 0.06298828125 7.5 7.75\n",
+ "Epoch 64/100 \t Train Err: 4.5625 0.007598876953125 0.059326171875 7.5625 7.6875\n",
+ "Epoch 64/100 \t Train Err: 4.5938 0.007110595703125 0.0654296875 7.5625 7.75\n",
+ "Epoch 64/100 \t Train Err: 4.5312 0.00689697265625 0.06396484375 7.53125 7.625\n",
+ "Epoch 64/100 \t Train Err: 4.5625 0.006591796875 0.06494140625 7.59375 7.59375\n",
+ "Epoch 65/100 \t Train Err: 4.5000 0.0064697265625 0.061767578125 7.40625 7.625\n",
+ "Epoch 65/100 \t Train Err: 4.5938 0.006439208984375 0.061767578125 7.625 7.71875\n",
+ "Epoch 65/100 \t Train Err: 4.5625 0.006500244140625 0.0615234375 7.5 7.6875\n",
+ "Epoch 65/100 \t Train Err: 4.5625 0.0064697265625 0.055908203125 7.25 7.75\n",
+ "Epoch 65/100 \t Train Err: 4.5625 0.00640869140625 0.056640625 7.34375 7.75\n",
+ "Epoch 65/100 \t Train Err: 4.4688 0.0064697265625 0.0625 7.375 7.5625\n",
+ "Epoch 65/100 \t Train Err: 4.4688 0.00640869140625 0.060302734375 7.34375 7.53125\n",
+ "Epoch 65/100 \t Train Err: 4.4375 0.00640869140625 0.059814453125 7.03125 7.6875\n",
+ "Epoch 66/100 \t Train Err: 4.5000 0.00628662109375 0.06005859375 6.96875 7.71875\n",
+ "Epoch 66/100 \t Train Err: 4.5312 0.006622314453125 0.058349609375 7.125 7.71875\n",
+ "Epoch 66/100 \t Train Err: 4.4688 0.006500244140625 0.05908203125 7.34375 7.53125\n",
+ "Epoch 66/100 \t Train Err: 4.5000 0.006683349609375 0.06298828125 7.46875 7.4375\n",
+ "Epoch 66/100 \t Train Err: 4.3750 0.006744384765625 0.057861328125 7.0625 7.3125\n",
+ "Epoch 66/100 \t Train Err: 4.4688 0.006683349609375 0.060791015625 7.125 7.5625\n",
+ "Epoch 66/100 \t Train Err: 4.5000 0.006744384765625 0.060302734375 7.25 7.65625\n",
+ "Epoch 66/100 \t Train Err: 4.5000 0.006744384765625 0.056884765625 7.125 7.625\n",
+ "Epoch 67/100 \t Train Err: 4.4688 0.006683349609375 0.05712890625 6.96875 7.65625\n",
+ "Epoch 67/100 \t Train Err: 4.3750 0.006683349609375 0.060546875 6.875 7.4375\n",
+ "Epoch 67/100 \t Train Err: 4.3438 0.0067138671875 0.06298828125 7.21875 7.25\n",
+ "Epoch 67/100 \t Train Err: 4.4688 0.00665283203125 0.06201171875 7.25 7.5625\n",
+ "Epoch 67/100 \t Train Err: 4.4375 0.00628662109375 0.057861328125 6.96875 7.5625\n",
+ "Epoch 67/100 \t Train Err: 4.2500 0.00640869140625 0.055419921875 6.53125 7.4375\n",
+ "Epoch 67/100 \t Train Err: 4.3750 0.0062255859375 0.051513671875 6.96875 7.46875\n",
+ "Epoch 67/100 \t Train Err: 4.4062 0.006195068359375 0.055419921875 6.96875 7.46875\n",
+ "Epoch 68/100 \t Train Err: 4.3438 0.00604248046875 0.052734375 6.71875 7.34375\n",
+ "Epoch 68/100 \t Train Err: 4.4062 0.005950927734375 0.055419921875 6.65625 7.59375\n",
+ "Epoch 68/100 \t Train Err: 4.3750 0.005859375 0.0546875 6.90625 7.34375\n",
+ "Epoch 68/100 \t Train Err: 4.3125 0.005950927734375 0.057373046875 6.6875 7.375\n",
+ "Epoch 68/100 \t Train Err: 4.3750 0.005889892578125 0.05517578125 6.53125 7.625\n",
+ "Epoch 68/100 \t Train Err: 4.3438 0.00592041015625 0.053955078125 6.53125 7.65625\n",
+ "Epoch 68/100 \t Train Err: 4.3438 0.005889892578125 0.056640625 6.65625 7.34375\n",
+ "Epoch 68/100 \t Train Err: 4.3125 0.005950927734375 0.05419921875 7.21875 7.25\n",
+ "Epoch 69/100 \t Train Err: 4.3438 0.005859375 0.057373046875 6.9375 7.25\n",
+ "Epoch 69/100 \t Train Err: 4.4062 0.00592041015625 0.0576171875 6.875 7.4375\n",
+ "Epoch 69/100 \t Train Err: 4.3125 0.00592041015625 0.0556640625 6.65625 7.375\n",
+ "Epoch 69/100 \t Train Err: 4.3438 0.00592041015625 0.0556640625 7.03125 7.40625\n",
+ "Epoch 69/100 \t Train Err: 4.2812 0.005889892578125 0.058837890625 6.53125 7.34375\n",
+ "Epoch 69/100 \t Train Err: 4.2500 0.00579833984375 0.05419921875 6.46875 7.25\n",
+ "Epoch 69/100 \t Train Err: 4.3125 0.005828857421875 0.051513671875 6.625 7.34375\n",
+ "Epoch 69/100 \t Train Err: 4.3125 0.005859375 0.052978515625 6.78125 7.21875\n",
+ "Epoch 70/100 \t Train Err: 4.2812 0.005859375 0.04931640625 6.625 7.25\n",
+ "Epoch 70/100 \t Train Err: 4.1875 0.005767822265625 0.051025390625 6.21875 7.1875\n",
+ "Epoch 70/100 \t Train Err: 4.2500 0.005828857421875 0.05126953125 6.34375 7.4375\n",
+ "Epoch 70/100 \t Train Err: 4.2812 0.005706787109375 0.05126953125 6.28125 7.5\n",
+ "Epoch 70/100 \t Train Err: 4.3438 0.0057373046875 0.05078125 6.40625 7.5\n",
+ "Epoch 70/100 \t Train Err: 4.3125 0.0057373046875 0.05517578125 6.875 7.1875\n",
+ "Epoch 70/100 \t Train Err: 4.1562 0.005828857421875 0.052734375 6.78125 6.875\n",
+ "Epoch 70/100 \t Train Err: 4.2188 0.00579833984375 0.05419921875 6.59375 7.15625\n",
+ "Epoch 71/100 \t Train Err: 4.3125 0.00567626953125 0.052490234375 6.5 7.46875\n",
+ "Epoch 71/100 \t Train Err: 4.2500 0.00567626953125 0.05126953125 6.125 7.5\n",
+ "Epoch 71/100 \t Train Err: 4.2500 0.005706787109375 0.05224609375 6.5 7.4375\n",
+ "Epoch 71/100 \t Train Err: 4.1875 0.0057373046875 0.053466796875 6.65625 6.96875\n",
+ "Epoch 71/100 \t Train Err: 4.2812 0.005767822265625 0.0546875 6.875 7.09375\n",
+ "Epoch 71/100 \t Train Err: 4.1562 0.005645751953125 0.054443359375 6.40625 6.96875\n",
+ "Epoch 71/100 \t Train Err: 4.2188 0.005615234375 0.0498046875 6.3125 7.4375\n",
+ "Epoch 71/100 \t Train Err: 4.2500 0.00555419921875 0.05224609375 6.40625 7.40625\n",
+ "Epoch 72/100 \t Train Err: 4.2500 0.005615234375 0.052978515625 6.15625 7.375\n",
+ "Epoch 72/100 \t Train Err: 4.2500 0.00537109375 0.0517578125 6.5 7.25\n",
+ "Epoch 72/100 \t Train Err: 4.1562 0.00543212890625 0.05029296875 6.4375 7.0\n",
+ "Epoch 72/100 \t Train Err: 4.1875 0.005401611328125 0.04638671875 6.3125 7.09375\n",
+ "Epoch 72/100 \t Train Err: 4.1562 0.00537109375 0.048828125 6.34375 7.09375\n",
+ "Epoch 72/100 \t Train Err: 4.2500 0.00537109375 0.0498046875 6.21875 7.3125\n",
+ "Epoch 72/100 \t Train Err: 4.2188 0.00531005859375 0.05078125 6.46875 7.21875\n",
+ "Epoch 72/100 \t Train Err: 4.1250 0.00537109375 0.05078125 6.46875 6.90625\n",
+ "Epoch 73/100 \t Train Err: 4.2500 0.005340576171875 0.053955078125 6.40625 7.1875\n",
+ "Epoch 73/100 \t Train Err: 4.1875 0.005279541015625 0.0517578125 6.59375 6.96875\n",
+ "Epoch 73/100 \t Train Err: 4.2500 0.00537109375 0.0556640625 6.40625 7.21875\n",
+ "Epoch 73/100 \t Train Err: 4.1250 0.00531005859375 0.051513671875 6.34375 6.96875\n",
+ "Epoch 73/100 \t Train Err: 4.2188 0.005279541015625 0.04833984375 6.0 7.46875\n",
+ "Epoch 73/100 \t Train Err: 4.1250 0.00537109375 0.050537109375 6.0625 7.1875\n",
+ "Epoch 73/100 \t Train Err: 4.1250 0.00531005859375 0.048095703125 5.90625 7.09375\n",
+ "Epoch 73/100 \t Train Err: 4.1562 0.0052490234375 0.05029296875 6.375 6.90625\n",
+ "Epoch 74/100 \t Train Err: 4.0625 0.005523681640625 0.049560546875 6.4375 6.8125\n",
+ "Epoch 74/100 \t Train Err: 4.1875 0.005279541015625 0.049072265625 5.90625 7.34375\n",
+ "Epoch 74/100 \t Train Err: 4.2188 0.00531005859375 0.04931640625 6.09375 7.375\n",
+ "Epoch 74/100 \t Train Err: 4.0938 0.00531005859375 0.05029296875 6.21875 6.96875\n",
+ "Epoch 74/100 \t Train Err: 4.1562 0.0052490234375 0.053955078125 6.3125 7.09375\n",
+ "Epoch 74/100 \t Train Err: 4.1250 0.00518798828125 0.05126953125 6.3125 7.0625\n",
+ "Epoch 74/100 \t Train Err: 4.1562 0.0052490234375 0.051513671875 6.21875 7.0625\n",
+ "Epoch 74/100 \t Train Err: 4.0938 0.005218505859375 0.049560546875 6.1875 7.0\n",
+ "Epoch 75/100 \t Train Err: 4.1250 0.005218505859375 0.0517578125 6.3125 7.0625\n",
+ "Epoch 75/100 \t Train Err: 4.0312 0.005218505859375 0.050048828125 6.03125 6.875\n",
+ "Epoch 75/100 \t Train Err: 4.0938 0.005157470703125 0.0498046875 5.84375 7.15625\n",
+ "Epoch 75/100 \t Train Err: 4.1250 0.005126953125 0.04541015625 5.875 7.21875\n",
+ "Epoch 75/100 \t Train Err: 4.0625 0.005126953125 0.05126953125 6.09375 6.9375\n",
+ "Epoch 75/100 \t Train Err: 4.0000 0.005218505859375 0.050537109375 6.40625 6.53125\n",
+ "Epoch 75/100 \t Train Err: 4.1250 0.00518798828125 0.049560546875 6.09375 6.9375\n",
+ "Epoch 75/100 \t Train Err: 4.0625 0.005126953125 0.046875 5.59375 7.1875\n",
+ "Epoch 76/100 \t Train Err: 4.0000 0.00506591796875 0.045654296875 5.5 7.15625\n",
+ "Epoch 76/100 \t Train Err: 4.1562 0.005157470703125 0.0458984375 6.0 7.25\n",
+ "Epoch 76/100 \t Train Err: 4.0625 0.00518798828125 0.048095703125 6.5 6.59375\n",
+ "Epoch 76/100 \t Train Err: 4.0625 0.005096435546875 0.0458984375 6.0 6.875\n",
+ "Epoch 76/100 \t Train Err: 4.0312 0.0052490234375 0.044921875 5.75 7.03125\n",
+ "Epoch 76/100 \t Train Err: 4.1562 0.00518798828125 0.043701171875 5.75 7.28125\n",
+ "Epoch 76/100 \t Train Err: 4.0625 0.005096435546875 0.0498046875 5.84375 7.09375\n",
+ "Epoch 76/100 \t Train Err: 4.0625 0.005126953125 0.044189453125 6.25 6.8125\n",
+ "Epoch 77/100 \t Train Err: 3.9688 0.005126953125 0.0478515625 6.15625 6.59375\n",
+ "Epoch 77/100 \t Train Err: 4.0312 0.005126953125 0.046142578125 5.84375 6.90625\n",
+ "Epoch 77/100 \t Train Err: 4.0000 0.00506591796875 0.04541015625 5.71875 6.96875\n",
+ "Epoch 77/100 \t Train Err: 3.9531 0.004974365234375 0.046142578125 5.46875 7.0\n",
+ "Epoch 77/100 \t Train Err: 4.0312 0.004974365234375 0.044677734375 6.0625 6.8125\n",
+ "Epoch 77/100 \t Train Err: 4.0312 0.005035400390625 0.047607421875 6.125 6.6875\n",
+ "Epoch 77/100 \t Train Err: 4.0312 0.004852294921875 0.047607421875 6.09375 6.84375\n",
+ "Epoch 77/100 \t Train Err: 3.9844 0.0048828125 0.043701171875 5.53125 6.96875\n",
+ "Epoch 78/100 \t Train Err: 3.9844 0.004852294921875 0.045654296875 5.875 6.875\n",
+ "Epoch 78/100 \t Train Err: 3.9844 0.004913330078125 0.0458984375 5.8125 6.78125\n",
+ "Epoch 78/100 \t Train Err: 3.9375 0.0048828125 0.046142578125 5.6875 6.65625\n",
+ "Epoch 78/100 \t Train Err: 3.9531 0.004974365234375 0.04541015625 5.71875 6.78125\n",
+ "Epoch 78/100 \t Train Err: 4.0000 0.004913330078125 0.0458984375 5.5625 7.0\n",
+ "Epoch 78/100 \t Train Err: 3.9375 0.0048828125 0.041748046875 5.46875 6.90625\n",
+ "Epoch 78/100 \t Train Err: 3.9688 0.00494384765625 0.043701171875 5.71875 6.875\n",
+ "Epoch 78/100 \t Train Err: 3.9375 0.005035400390625 0.048095703125 5.96875 6.46875\n",
+ "Epoch 79/100 \t Train Err: 3.9844 0.0050048828125 0.049072265625 6.03125 6.625\n",
+ "Epoch 79/100 \t Train Err: 3.9688 0.0050048828125 0.040283203125 4.90625 7.25\n",
+ "Epoch 79/100 \t Train Err: 4.0312 0.004974365234375 0.04345703125 5.4375 7.1875\n",
+ "Epoch 79/100 \t Train Err: 3.9688 0.005035400390625 0.045166015625 6.03125 6.5625\n",
+ "Epoch 79/100 \t Train Err: 4.0000 0.005035400390625 0.0478515625 6.46875 6.28125\n",
+ "Epoch 79/100 \t Train Err: 3.9219 0.005035400390625 0.043701171875 5.03125 7.03125\n",
+ "Epoch 79/100 \t Train Err: 4.0312 0.004974365234375 0.044921875 4.90625 7.375\n",
+ "Epoch 79/100 \t Train Err: 3.9062 0.004974365234375 0.04296875 5.8125 6.625\n",
+ "Epoch 80/100 \t Train Err: 3.9219 0.0050048828125 0.04638671875 6.40625 6.25\n",
+ "Epoch 80/100 \t Train Err: 3.8750 0.004974365234375 0.045654296875 5.65625 6.5\n",
+ "Epoch 80/100 \t Train Err: 3.9531 0.0048828125 0.04345703125 5.15625 7.15625\n",
+ "Epoch 80/100 \t Train Err: 3.9688 0.004913330078125 0.043701171875 5.25 7.0\n",
+ "Epoch 80/100 \t Train Err: 3.8906 0.004974365234375 0.04345703125 5.875 6.53125\n",
+ "Epoch 80/100 \t Train Err: 3.8281 0.00494384765625 0.044677734375 5.65625 6.4375\n",
+ "Epoch 80/100 \t Train Err: 3.9531 0.004852294921875 0.041259765625 5.4375 6.96875\n",
+ "Epoch 80/100 \t Train Err: 3.9531 0.00494384765625 0.04345703125 5.15625 7.0\n",
+ "Epoch 81/100 \t Train Err: 3.9844 0.0048828125 0.0419921875 5.5625 6.8125\n",
+ "Epoch 81/100 \t Train Err: 3.9219 0.0048828125 0.04443359375 5.71875 6.46875\n",
+ "Epoch 81/100 \t Train Err: 3.9375 0.00482177734375 0.046630859375 5.71875 6.625\n",
+ "Epoch 81/100 \t Train Err: 3.9219 0.00469970703125 0.042236328125 5.03125 6.90625\n",
+ "Epoch 81/100 \t Train Err: 3.9219 0.0047607421875 0.042236328125 5.0625 7.03125\n",
+ "Epoch 81/100 \t Train Err: 3.9844 0.004791259765625 0.044189453125 5.5625 6.90625\n",
+ "Epoch 81/100 \t Train Err: 3.8750 0.0048828125 0.044677734375 5.875 6.375\n",
+ "Epoch 81/100 \t Train Err: 3.8438 0.00482177734375 0.045166015625 5.40625 6.5625\n",
+ "Epoch 82/100 \t Train Err: 3.8281 0.0048828125 0.044189453125 5.0625 6.8125\n",
+ "Epoch 82/100 \t Train Err: 3.8906 0.004913330078125 0.042724609375 5.15625 7.03125\n",
+ "Epoch 82/100 \t Train Err: 3.8750 0.00482177734375 0.048583984375 5.90625 6.34375\n",
+ "Epoch 82/100 \t Train Err: 3.8594 0.004913330078125 0.045166015625 5.6875 6.28125\n",
+ "Epoch 82/100 \t Train Err: 3.8594 0.004852294921875 0.043701171875 5.28125 6.78125\n",
+ "Epoch 82/100 \t Train Err: 3.8594 0.004852294921875 0.042236328125 4.84375 6.9375\n",
+ "Epoch 82/100 \t Train Err: 3.7969 0.0048828125 0.0400390625 5.15625 6.625\n",
+ "Epoch 82/100 \t Train Err: 3.8281 0.004730224609375 0.04736328125 5.53125 6.28125\n",
+ "Epoch 83/100 \t Train Err: 3.8438 0.0047607421875 0.044921875 5.3125 6.53125\n",
+ "Epoch 83/100 \t Train Err: 3.7969 0.00469970703125 0.043212890625 5.09375 6.625\n",
+ "Epoch 83/100 \t Train Err: 3.8125 0.004669189453125 0.043701171875 5.1875 6.71875\n",
+ "Epoch 83/100 \t Train Err: 3.8281 0.004669189453125 0.042236328125 5.09375 6.6875\n",
+ "Epoch 83/100 \t Train Err: 3.8125 0.0047607421875 0.042236328125 5.40625 6.46875\n",
+ "Epoch 83/100 \t Train Err: 3.8594 0.00482177734375 0.04150390625 5.34375 6.59375\n",
+ "Epoch 83/100 \t Train Err: 3.7500 0.00482177734375 0.041748046875 5.3125 6.375\n",
+ "Epoch 83/100 \t Train Err: 3.7812 0.004791259765625 0.040771484375 4.75 6.75\n",
+ "Epoch 84/100 \t Train Err: 3.7188 0.0047607421875 0.0390625 5.21875 6.34375\n",
+ "Epoch 84/100 \t Train Err: 3.7656 0.0047607421875 0.040771484375 5.09375 6.40625\n",
+ "Epoch 84/100 \t Train Err: 3.7969 0.004852294921875 0.04248046875 5.25 6.34375\n",
+ "Epoch 84/100 \t Train Err: 3.7344 0.0047607421875 0.03955078125 5.125 6.3125\n",
+ "Epoch 84/100 \t Train Err: 3.7500 0.004791259765625 0.03857421875 4.5 6.90625\n",
+ "Epoch 84/100 \t Train Err: 3.7656 0.00482177734375 0.04248046875 5.1875 6.40625\n",
+ "Epoch 84/100 \t Train Err: 3.7656 0.00482177734375 0.042724609375 5.3125 6.21875\n",
+ "Epoch 84/100 \t Train Err: 3.7188 0.004730224609375 0.04345703125 5.21875 6.28125\n",
+ "Epoch 85/100 \t Train Err: 3.7500 0.004608154296875 0.039794921875 4.65625 6.625\n",
+ "Epoch 85/100 \t Train Err: 3.7344 0.004608154296875 0.04150390625 4.71875 6.53125\n",
+ "Epoch 85/100 \t Train Err: 3.7344 0.004730224609375 0.042236328125 5.375 6.3125\n",
+ "Epoch 85/100 \t Train Err: 3.7656 0.004730224609375 0.040283203125 5.4375 6.28125\n",
+ "Epoch 85/100 \t Train Err: 3.6875 0.00469970703125 0.0400390625 5.0 6.28125\n",
+ "Epoch 85/100 \t Train Err: 3.7031 0.00469970703125 0.037109375 4.8125 6.53125\n",
+ "Epoch 85/100 \t Train Err: 3.7500 0.00469970703125 0.038330078125 4.78125 6.625\n",
+ "Epoch 85/100 \t Train Err: 3.6875 0.004669189453125 0.0400390625 5.34375 6.0625\n",
+ "Epoch 86/100 \t Train Err: 3.7344 0.004638671875 0.037841796875 5.40625 6.25\n",
+ "Epoch 86/100 \t Train Err: 3.7031 0.004638671875 0.0380859375 4.875 6.46875\n",
+ "Epoch 86/100 \t Train Err: 3.7344 0.00457763671875 0.0390625 4.59375 6.6875\n",
+ "Epoch 86/100 \t Train Err: 3.7031 0.004638671875 0.036865234375 5.09375 6.21875\n",
+ "Epoch 86/100 \t Train Err: 3.7031 0.004638671875 0.0390625 5.28125 6.15625\n",
+ "Epoch 86/100 \t Train Err: 3.7500 0.004669189453125 0.037841796875 4.875 6.59375\n",
+ "Epoch 86/100 \t Train Err: 3.7188 0.004638671875 0.0390625 4.625 6.5625\n",
+ "Epoch 86/100 \t Train Err: 3.6406 0.004730224609375 0.039306640625 5.125 5.9375\n",
+ "Epoch 87/100 \t Train Err: 3.6875 0.00469970703125 0.03759765625 4.9375 6.28125\n",
+ "Epoch 87/100 \t Train Err: 3.6875 0.004730224609375 0.039794921875 4.78125 6.25\n",
+ "Epoch 87/100 \t Train Err: 3.7031 0.004791259765625 0.0361328125 4.65625 6.53125\n",
+ "Epoch 87/100 \t Train Err: 3.6719 0.0047607421875 0.03662109375 4.625 6.5\n",
+ "Epoch 87/100 \t Train Err: 3.6094 0.004791259765625 0.037109375 5.125 6.0\n",
+ "Epoch 87/100 \t Train Err: 3.7344 0.00482177734375 0.036865234375 5.375 6.15625\n",
+ "Epoch 87/100 \t Train Err: 3.6719 0.0047607421875 0.037353515625 4.65625 6.53125\n",
+ "Epoch 87/100 \t Train Err: 3.6562 0.004791259765625 0.03662109375 4.59375 6.4375\n",
+ "Epoch 88/100 \t Train Err: 3.6562 0.004638671875 0.03857421875 5.28125 6.0\n",
+ "Epoch 88/100 \t Train Err: 3.5938 0.004730224609375 0.040283203125 4.875 6.03125\n",
+ "Epoch 88/100 \t Train Err: 3.6875 0.004608154296875 0.03955078125 4.5625 6.5\n",
+ "Epoch 88/100 \t Train Err: 3.6875 0.004730224609375 0.0380859375 4.71875 6.46875\n",
+ "Epoch 88/100 \t Train Err: 3.5469 0.004608154296875 0.037353515625 4.53125 6.21875\n",
+ "Epoch 88/100 \t Train Err: 3.6250 0.004608154296875 0.040771484375 4.9375 6.125\n",
+ "Epoch 88/100 \t Train Err: 3.6094 0.004486083984375 0.038330078125 4.8125 6.21875\n",
+ "Epoch 88/100 \t Train Err: 3.5938 0.0045166015625 0.0400390625 4.75 6.21875\n",
+ "Epoch 89/100 \t Train Err: 3.6406 0.0045166015625 0.038330078125 4.875 6.21875\n",
+ "Epoch 89/100 \t Train Err: 3.5469 0.00457763671875 0.041748046875 5.21875 5.6875\n",
+ "Epoch 89/100 \t Train Err: 3.6250 0.004608154296875 0.03759765625 4.46875 6.40625\n",
+ "Epoch 89/100 \t Train Err: 3.5469 0.004638671875 0.035400390625 4.625 6.15625\n",
+ "Epoch 89/100 \t Train Err: 3.6094 0.00469970703125 0.0341796875 4.40625 6.375\n",
+ "Epoch 89/100 \t Train Err: 3.5781 0.00469970703125 0.0361328125 4.75 6.0625\n",
+ "Epoch 89/100 \t Train Err: 3.4688 0.0047607421875 0.0341796875 4.53125 5.96875\n",
+ "Epoch 89/100 \t Train Err: 3.5781 0.00469970703125 0.033935546875 4.40625 6.34375\n",
+ "Epoch 90/100 \t Train Err: 3.5625 0.0047607421875 0.03369140625 4.53125 6.25\n",
+ "Epoch 90/100 \t Train Err: 3.6094 0.004791259765625 0.033447265625 4.875 6.0625\n",
+ "Epoch 90/100 \t Train Err: 3.6250 0.0048828125 0.034423828125 4.40625 6.34375\n",
+ "Epoch 90/100 \t Train Err: 3.5469 0.004913330078125 0.03662109375 4.71875 6.0\n",
+ "Epoch 90/100 \t Train Err: 3.5000 0.004913330078125 0.035400390625 4.40625 6.125\n",
+ "Epoch 90/100 \t Train Err: 3.5469 0.00494384765625 0.03369140625 4.125 6.375\n",
+ "Epoch 90/100 \t Train Err: 3.5156 0.004852294921875 0.035400390625 4.53125 5.96875\n",
+ "Epoch 90/100 \t Train Err: 3.5156 0.00469970703125 0.036376953125 4.71875 5.90625\n",
+ "Epoch 91/100 \t Train Err: 3.5312 0.0047607421875 0.033447265625 4.375 6.28125\n",
+ "Epoch 91/100 \t Train Err: 3.5469 0.004730224609375 0.032958984375 4.34375 6.3125\n",
+ "Epoch 91/100 \t Train Err: 3.6094 0.004669189453125 0.0390625 5.34375 5.65625\n",
+ "Epoch 91/100 \t Train Err: 3.5156 0.004608154296875 0.032958984375 4.28125 6.0625\n",
+ "Epoch 91/100 \t Train Err: 3.5469 0.00457763671875 0.03125 4.03125 6.4375\n",
+ "Epoch 91/100 \t Train Err: 3.5312 0.004608154296875 0.03564453125 4.84375 5.71875\n",
+ "Epoch 91/100 \t Train Err: 3.4844 0.004547119140625 0.0322265625 4.5 5.90625\n",
+ "Epoch 91/100 \t Train Err: 3.5312 0.004608154296875 0.0299072265625 3.953125 6.625\n",
+ "Epoch 92/100 \t Train Err: 3.5000 0.004547119140625 0.03515625 5.21875 5.46875\n",
+ "Epoch 92/100 \t Train Err: 3.4688 0.004486083984375 0.034423828125 4.53125 5.9375\n",
+ "Epoch 92/100 \t Train Err: 3.4531 0.00457763671875 0.032470703125 3.875 6.375\n",
+ "Epoch 92/100 \t Train Err: 3.5938 0.004669189453125 0.038330078125 4.9375 5.84375\n",
+ "Epoch 92/100 \t Train Err: 3.5156 0.004730224609375 0.03369140625 5.03125 5.6875\n",
+ "Epoch 92/100 \t Train Err: 3.5625 0.004791259765625 0.029052734375 3.890625 6.65625\n",
+ "Epoch 92/100 \t Train Err: 3.4531 0.004730224609375 0.032470703125 4.5 5.90625\n",
+ "Epoch 92/100 \t Train Err: 3.4531 0.0047607421875 0.030517578125 4.71875 5.6875\n",
+ "Epoch 93/100 \t Train Err: 3.5156 0.00482177734375 0.0281982421875 3.78125 6.53125\n",
+ "Epoch 93/100 \t Train Err: 3.4531 0.00482177734375 0.03173828125 4.6875 5.5625\n",
+ "Epoch 93/100 \t Train Err: 3.4531 0.004791259765625 0.03271484375 4.6875 5.6875\n",
+ "Epoch 93/100 \t Train Err: 3.4375 0.00469970703125 0.0279541015625 3.96875 6.25\n",
+ "Epoch 93/100 \t Train Err: 3.3594 0.004730224609375 0.03076171875 4.125 5.9375\n",
+ "Epoch 93/100 \t Train Err: 3.4688 0.004730224609375 0.0301513671875 4.96875 5.625\n",
+ "Epoch 93/100 \t Train Err: 3.3906 0.004730224609375 0.0296630859375 4.03125 6.0625\n",
+ "Epoch 93/100 \t Train Err: 3.4688 0.0047607421875 0.0294189453125 4.25 6.0625\n",
+ "Epoch 94/100 \t Train Err: 3.3906 0.0047607421875 0.031005859375 4.375 5.6875\n",
+ "Epoch 94/100 \t Train Err: 3.4219 0.004791259765625 0.031494140625 4.53125 5.8125\n",
+ "Epoch 94/100 \t Train Err: 3.4375 0.004791259765625 0.0281982421875 4.25 6.03125\n",
+ "Epoch 94/100 \t Train Err: 3.4219 0.004791259765625 0.02978515625 4.34375 5.8125\n",
+ "Epoch 94/100 \t Train Err: 3.4219 0.00469970703125 0.0308837890625 4.375 5.78125\n",
+ "Epoch 94/100 \t Train Err: 3.4375 0.004669189453125 0.028564453125 4.25 6.0\n",
+ "Epoch 94/100 \t Train Err: 3.3594 0.00469970703125 0.0283203125 4.25 5.875\n",
+ "Epoch 94/100 \t Train Err: 3.3594 0.004852294921875 0.03125 4.28125 5.6875\n",
+ "Epoch 95/100 \t Train Err: 3.3750 0.004791259765625 0.0284423828125 4.25 5.6875\n",
+ "Epoch 95/100 \t Train Err: 3.3750 0.0047607421875 0.0279541015625 3.90625 5.96875\n",
+ "Epoch 95/100 \t Train Err: 3.3594 0.004730224609375 0.029052734375 4.28125 5.71875\n",
+ "Epoch 95/100 \t Train Err: 3.3906 0.004791259765625 0.0296630859375 4.3125 5.78125\n",
+ "Epoch 95/100 \t Train Err: 3.3750 0.004791259765625 0.03076171875 4.125 5.78125\n",
+ "Epoch 95/100 \t Train Err: 3.3594 0.0047607421875 0.0291748046875 4.25 5.8125\n",
+ "Epoch 95/100 \t Train Err: 3.3438 0.004669189453125 0.02783203125 4.15625 5.875\n",
+ "Epoch 95/100 \t Train Err: 3.3281 0.0047607421875 0.0299072265625 4.375 5.5\n",
+ "Epoch 96/100 \t Train Err: 3.3438 0.0048828125 0.02880859375 3.84375 5.90625\n",
+ "Epoch 96/100 \t Train Err: 3.3750 0.0047607421875 0.030029296875 4.0625 5.8125\n",
+ "Epoch 96/100 \t Train Err: 3.3438 0.00482177734375 0.0308837890625 3.9375 5.71875\n",
+ "Epoch 96/100 \t Train Err: 3.3125 0.004730224609375 0.028564453125 4.21875 5.59375\n",
+ "Epoch 96/100 \t Train Err: 3.2969 0.004638671875 0.0291748046875 3.765625 5.90625\n",
+ "Epoch 96/100 \t Train Err: 3.3750 0.004638671875 0.034912109375 4.375 5.75\n",
+ "Epoch 96/100 \t Train Err: 3.2656 0.004638671875 0.029296875 4.125 5.53125\n",
+ "Epoch 96/100 \t Train Err: 3.2500 0.004638671875 0.0286865234375 3.984375 5.625\n",
+ "Epoch 97/100 \t Train Err: 3.2656 0.00457763671875 0.028564453125 4.125 5.59375\n",
+ "Epoch 97/100 \t Train Err: 3.3438 0.004547119140625 0.02587890625 3.859375 5.9375\n",
+ "Epoch 97/100 \t Train Err: 3.3125 0.00457763671875 0.02783203125 4.03125 5.6875\n",
+ "Epoch 97/100 \t Train Err: 3.3438 0.004608154296875 0.02783203125 4.125 5.6875\n",
+ "Epoch 97/100 \t Train Err: 3.3125 0.004730224609375 0.0279541015625 3.875 5.75\n",
+ "Epoch 97/100 \t Train Err: 3.2500 0.004730224609375 0.0289306640625 3.78125 5.6875\n",
+ "Epoch 97/100 \t Train Err: 3.2812 0.004669189453125 0.030517578125 4.03125 5.5625\n",
+ "Epoch 97/100 \t Train Err: 3.3281 0.004638671875 0.029296875 4.1875 5.625\n",
+ "Epoch 98/100 \t Train Err: 3.2969 0.004669189453125 0.0264892578125 3.734375 6.0\n",
+ "Epoch 98/100 \t Train Err: 3.2031 0.0047607421875 0.0262451171875 3.921875 5.5\n",
+ "Epoch 98/100 \t Train Err: 3.2969 0.004791259765625 0.02685546875 4.59375 5.1875\n",
+ "Epoch 98/100 \t Train Err: 3.3750 0.0047607421875 0.02392578125 3.34375 6.5\n",
+ "Epoch 98/100 \t Train Err: 3.4688 0.004791259765625 0.033447265625 5.625 4.625\n",
+ "Epoch 98/100 \t Train Err: 4.5625 0.00482177734375 0.01953125 1.4296875 11.0625\n",
+ "Epoch 98/100 \t Train Err: 10.6875 0.00567626953125 0.44140625 46.5 1.109375\n",
+ "Epoch 98/100 \t Train Err: 11.5625 0.0096435546875 0.0252685546875 0.322265625 29.875\n",
+ "Epoch 99/100 \t Train Err: 12.5000 0.1318359375 0.0341796875 0.1640625 32.25\n",
+ "Epoch 99/100 \t Train Err: 7.3125 0.71484375 0.66796875 2.953125 17.125\n",
+ "Epoch 99/100 \t Train Err: 9.1250 1.265625 3.046875 38.0 3.265625\n",
+ "Epoch 99/100 \t Train Err: 9.4375 1.078125 3.578125 40.5 2.859375\n",
+ "Epoch 99/100 \t Train Err: 5.9688 0.419921875 1.484375 14.3125 8.125\n",
+ "Epoch 99/100 \t Train Err: 6.6250 0.01513671875 0.3125 4.5 15.25\n",
+ "Epoch 99/100 \t Train Err: 7.5000 0.326171875 0.042236328125 2.78125 18.0\n",
+ "Epoch 99/100 \t Train Err: 6.6250 0.765625 0.087890625 5.0 14.375\n"
]
}
],
@@ -889,9 +1363,14 @@
" \n",
" # test_err.append(test_loss)\n",
" train_err.append(train_loss)\n",
+ " len1.append(criterion(output[batch_labels == 1].squeeze(1), batch_labels[batch_labels==1]))\n",
+ " len2.append(criterion(output[batch_labels == 2].squeeze(1), batch_labels[batch_labels==2]))\n",
+ " len3.append(criterion(output[batch_labels == 3].squeeze(1), batch_labels[batch_labels==3]))\n",
+ " len15.append(criterion(output[batch_labels == 15].squeeze(1), batch_labels[batch_labels==15]))\n",
+ " \n",
" with open('loss', 'a') as f:\n",
" f.write(f\"{train_loss}\\n\")\n",
- " print(f\"Epoch {epoch}/{NEPOCHS} \\t Train Err: {train_loss:.4f}\")\n",
+ " print(f\"Epoch {epoch}/{NEPOCHS} \\t Train Err: {train_loss:.4f} {len1[-1]} {len2[-1]} {len3[-1]} {len15[-1]}\")\n",
"\n",
" epoch += 1\n",
" if epoch % 100 == 0:\n",
@@ -900,7 +1379,8 @@
},
{
"cell_type": "code",
- "execution_count": 125,
+ "execution_count": 16,
+ "execution_state": "idle",
"metadata": {},
"outputs": [],
"source": [
@@ -929,21 +1409,10 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "<Figure size 640x480 with 1 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"plt.suptitle('MSE vs Epochs')\n",
"plt.plot(train_err, label='Train', color='blue')\n",
@@ -954,7 +1423,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 41,
"execution_state": "idle",
"metadata": {
"id": "LoGEmM5lH7_A"
@@ -962,46 +1431,9 @@
"outputs": [
{
"data": {
+ "image/png": "",
"text/plain": [
- "(array([[3.1870e+04, 4.5000e+01, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " ...,\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 1.3300e+02, 2.9200e+02,\n",
- " 4.8201e+04]]),\n",
- " array([ 1. , 1.28 , 1.561, 1.84 , 2.121, 2.4 , 2.68 , 2.96 ,\n",
- " 3.24 , 3.52 , 3.8 , 4.08 , 4.36 , 4.64 , 4.92 , 5.2 ,\n",
- " 5.48 , 5.76 , 6.04 , 6.32 , 6.6 , 6.88 , 7.16 , 7.44 ,\n",
- " 7.72 , 8. , 8.28 , 8.56 , 8.84 , 9.12 , 9.4 , 9.68 ,\n",
- " 9.96 , 10.24 , 10.52 , 10.805, 11.08 , 11.36 , 11.64 , 11.92 ,\n",
- " 12.2 , 12.484, 12.76 , 13.04 , 13.32 , 13.6 , 13.88 , 14.164,\n",
- " 14.44 , 14.72 , 15. ], dtype=float16),\n",
- " array([ 0.824, 1.1 , 1.376, 1.652, 1.928, 2.203, 2.48 , 2.756,\n",
- " 3.031, 3.307, 3.582, 3.86 , 4.133, 4.41 , 4.688, 4.96 ,\n",
- " 5.24 , 5.516, 5.79 , 6.066, 6.34 , 6.617, 6.895, 7.168,\n",
- " 7.445, 7.723, 7.996, 8.27 , 8.55 , 8.83 , 9.09 , 9.375,\n",
- " 9.66 , 9.92 , 10.2 , 10.484, 10.75 , 11.03 , 11.31 , 11.58 ,\n",
- " 11.86 , 12.14 , 12.41 , 12.69 , 12.97 , 13.234, 13.516, 13.8 ,\n",
- " 14.06 , 14.34 , 14.625], dtype=float16),\n",
- " <matplotlib.collections.QuadMesh at 0x7fe60c0a49b0>)"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "<Figure size 640x480 with 1 Axes>"
+ "<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
@@ -1013,29 +1445,22 @@
"model.eval()\n",
"with torch.no_grad():\n",
" output = model(batch_src, batch_padding_mask)\n",
- "batch_src[0], batch_labels[0], output[0]\n",
- "x = batch_labels.detach().to(torch.float16).cpu().numpy().flatten()\n",
- "y = output.detach().to(torch.float16).cpu().numpy().flatten()\n",
- "plt.hist2d(x, y, bins=50, norm=mpl.colors.LogNorm())"
+ "x = batch_labels.detach().to(torch.uint8)\n",
+ "y = output.detach()\n",
+ "cnts = torch.bincount(x)\n",
+ "weights = [1/cnts[i.item()].item() for i in x] # normalize by label count\n",
+ "fig, ax = plt.subplots()\n",
+ "h = ax.hist2d(x.cpu().numpy().flatten(), y.to(torch.float16).cpu().numpy().flatten(), weights=weights, bins=[15,50], norm=mpl.colors.LogNorm())\n",
+ "fig.colorbar(h[3], ax=ax)\n",
+ "plt.show()"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.353515625"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"evaluate()"
]
@@ -1051,7 +1476,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
"outputs": [],
@@ -1071,7 +1496,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
"outputs": [],
@@ -1088,117 +1513,10 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/100 \t Train Err: 2.8906\n",
- "Epoch 2/100 \t Train Err: 0.3340\n",
- "Epoch 3/100 \t Train Err: 0.1709\n",
- "Epoch 4/100 \t Train Err: 0.2373\n",
- "Epoch 5/100 \t Train Err: 0.2520\n",
- "Epoch 6/100 \t Train Err: 0.1953\n",
- "Epoch 7/100 \t Train Err: 0.1963\n",
- "Epoch 8/100 \t Train Err: 0.2236\n",
- "Epoch 9/100 \t Train Err: 0.2119\n",
- "Epoch 10/100 \t Train Err: 0.1777\n",
- "Epoch 11/100 \t Train Err: 0.1660\n",
- "Epoch 12/100 \t Train Err: 0.1787\n",
- "Epoch 13/100 \t Train Err: 0.1816\n",
- "Epoch 14/100 \t Train Err: 0.1562\n",
- "Epoch 15/100 \t Train Err: 0.1377\n",
- "Epoch 16/100 \t Train Err: 0.1377\n",
- "Epoch 17/100 \t Train Err: 0.1387\n",
- "Epoch 18/100 \t Train Err: 0.1289\n",
- "Epoch 19/100 \t Train Err: 0.1162\n",
- "Epoch 20/100 \t Train Err: 0.1079\n",
- "Epoch 21/100 \t Train Err: 0.1108\n",
- "Epoch 22/100 \t Train Err: 0.1099\n",
- "Epoch 23/100 \t Train Err: 0.1021\n",
- "Epoch 24/100 \t Train Err: 0.0918\n",
- "Epoch 25/100 \t Train Err: 0.0913\n",
- "Epoch 26/100 \t Train Err: 0.0913\n",
- "Epoch 27/100 \t Train Err: 0.0859\n",
- "Epoch 28/100 \t Train Err: 0.0820\n",
- "Epoch 29/100 \t Train Err: 0.0767\n",
- "Epoch 30/100 \t Train Err: 0.0776\n",
- "Epoch 31/100 \t Train Err: 0.0747\n",
- "Epoch 32/100 \t Train Err: 0.0713\n",
- "Epoch 33/100 \t Train Err: 0.0698\n",
- "Epoch 34/100 \t Train Err: 0.0679\n",
- "Epoch 35/100 \t Train Err: 0.0664\n",
- "Epoch 36/100 \t Train Err: 0.0669\n",
- "Epoch 37/100 \t Train Err: 0.0645\n",
- "Epoch 38/100 \t Train Err: 0.0601\n",
- "Epoch 39/100 \t Train Err: 0.0583\n",
- "Epoch 40/100 \t Train Err: 0.0569\n",
- "Epoch 41/100 \t Train Err: 0.0564\n",
- "Epoch 42/100 \t Train Err: 0.0554\n",
- "Epoch 43/100 \t Train Err: 0.0532\n",
- "Epoch 44/100 \t Train Err: 0.0520\n",
- "Epoch 45/100 \t Train Err: 0.0500\n",
- "Epoch 46/100 \t Train Err: 0.0483\n",
- "Epoch 47/100 \t Train Err: 0.0457\n",
- "Epoch 48/100 \t Train Err: 0.0452\n",
- "Epoch 49/100 \t Train Err: 0.0444\n",
- "Epoch 50/100 \t Train Err: 0.0430\n",
- "Epoch 51/100 \t Train Err: 0.0422\n",
- "Epoch 52/100 \t Train Err: 0.0405\n",
- "Epoch 53/100 \t Train Err: 0.0408\n",
- "Epoch 54/100 \t Train Err: 0.0378\n",
- "Epoch 55/100 \t Train Err: 0.0378\n",
- "Epoch 56/100 \t Train Err: 0.0369\n",
- "Epoch 57/100 \t Train Err: 0.0354\n",
- "Epoch 58/100 \t Train Err: 0.0344\n",
- "Epoch 59/100 \t Train Err: 0.0337\n",
- "Epoch 60/100 \t Train Err: 0.0334\n",
- "Epoch 61/100 \t Train Err: 0.0322\n",
- "Epoch 62/100 \t Train Err: 0.0312\n",
- "Epoch 63/100 \t Train Err: 0.0304\n",
- "Epoch 64/100 \t Train Err: 0.0310\n",
- "Epoch 65/100 \t Train Err: 0.0304\n",
- "Epoch 66/100 \t Train Err: 0.0297\n",
- "Epoch 67/100 \t Train Err: 0.0283\n",
- "Epoch 68/100 \t Train Err: 0.0281\n",
- "Epoch 69/100 \t Train Err: 0.0280\n",
- "Epoch 70/100 \t Train Err: 0.0273\n",
- "Epoch 71/100 \t Train Err: 0.0267\n",
- "Epoch 72/100 \t Train Err: 0.0277\n",
- "Epoch 73/100 \t Train Err: 0.0269\n",
- "Epoch 74/100 \t Train Err: 0.0258\n",
- "Epoch 75/100 \t Train Err: 0.0249\n",
- "Epoch 76/100 \t Train Err: 0.0254\n",
- "Epoch 77/100 \t Train Err: 0.0245\n",
- "Epoch 78/100 \t Train Err: 0.0244\n",
- "Epoch 79/100 \t Train Err: 0.0242\n",
- "Epoch 80/100 \t Train Err: 0.0237\n",
- "Epoch 81/100 \t Train Err: 0.0243\n",
- "Epoch 82/100 \t Train Err: 0.0225\n",
- "Epoch 83/100 \t Train Err: 0.0225\n",
- "Epoch 84/100 \t Train Err: 0.0221\n",
- "Epoch 85/100 \t Train Err: 0.0227\n",
- "Epoch 86/100 \t Train Err: 0.0222\n",
- "Epoch 87/100 \t Train Err: 0.0219\n",
- "Epoch 88/100 \t Train Err: 0.0220\n",
- "Epoch 89/100 \t Train Err: 0.0210\n",
- "Epoch 90/100 \t Train Err: 0.0210\n",
- "Epoch 91/100 \t Train Err: 0.0211\n",
- "Epoch 92/100 \t Train Err: 0.0208\n",
- "Epoch 93/100 \t Train Err: 0.0205\n",
- "Epoch 94/100 \t Train Err: 0.0200\n",
- "Epoch 95/100 \t Train Err: 0.0208\n",
- "Epoch 96/100 \t Train Err: 0.0198\n",
- "Epoch 97/100 \t Train Err: 0.0195\n",
- "Epoch 98/100 \t Train Err: 0.0197\n",
- "Epoch 99/100 \t Train Err: 0.0190\n",
- "Epoch 100/100 \t Train Err: 0.0192\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"for epoch in range(N_TUNE_EPOCHS):\n",
" model.train()\n",
@@ -1222,21 +1540,10 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "<Figure size 640x480 with 1 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"plt.suptitle('MSE vs Epochs')\n",
"plt.plot(tune_train_err, label='Train', color='blue')\n",
@@ -1247,78 +1554,20 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.0189208984375"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tune_evaluate()"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"execution_state": "idle",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(array([[2.6100e+02, 8.9530e+03, 8.2329e+04, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " ...,\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,\n",
- " 0.0000e+00],\n",
- " [0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 1.0000e+00,\n",
- " 0.0000e+00]]),\n",
- " array([1. , 1.1 , 1.2 , 1.3 , 1.4 , 1.5 , 1.6 , 1.699, 1.8 ,\n",
- " 1.9 , 2. , 2.1 , 2.2 , 2.3 , 2.398, 2.5 , 2.6 , 2.7 ,\n",
- " 2.8 , 2.898, 3. , 3.1 , 3.2 , 3.299, 3.398, 3.5 , 3.6 ,\n",
- " 3.7 , 3.799, 3.898, 4. , 4.1 , 4.2 , 4.297, 4.4 , 4.5 ,\n",
- " 4.6 , 4.7 , 4.797, 4.9 , 5. , 5.098, 5.2 , 5.3 , 5.4 ,\n",
- " 5.5 , 5.598, 5.7 , 5.797, 5.9 , 6. ], dtype=float16),\n",
- " array([0.8477, 0.913 , 0.9785, 1.044 , 1.109 , 1.176 , 1.241 , 1.307 ,\n",
- " 1.372 , 1.4375, 1.503 , 1.568 , 1.635 , 1.699 , 1.766 , 1.831 ,\n",
- " 1.896 , 1.962 , 2.027 , 2.094 , 2.158 , 2.225 , 2.29 , 2.355 ,\n",
- " 2.422 , 2.486 , 2.55 , 2.617 , 2.684 , 2.75 , 2.814 , 2.879 ,\n",
- " 2.945 , 3.012 , 3.076 , 3.143 , 3.207 , 3.273 , 3.338 , 3.404 ,\n",
- " 3.469 , 3.535 , 3.602 , 3.666 , 3.732 , 3.797 , 3.863 , 3.928 ,\n",
- " 3.994 , 4.062 , 4.125 ], dtype=float16),\n",
- " <matplotlib.collections.QuadMesh at 0x7fe6040e22a0>)"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "<Figure size 640x480 with 1 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"batch_src, batch_labels, batch_padding_mask = mktunebatch(BSZ)\n",
"model.eval()\n",
@@ -1340,51 +1589,30 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
+ "execution_state": "idle",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "batch_src, batch_labels, batch_padding_mask = mktunebatch(BSZ, test=True)\n",
+ "model.eval()\n",
+ "with torch.no_grad():\n",
+ " output = model(batch_src, batch_padding_mask)\n",
+ "print(criterion(output.squeeze(1), batch_labels).item())\n",
+ "x = batch_labels.detach().to(torch.float16).cpu().numpy().flatten()\n",
+ "y = output.detach().to(torch.float16).cpu().numpy().flatten()\n",
+ "plt.hist2d(x, y, bins=50, norm=mpl.colors.LogNorm())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
"execution_state": "idle",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0.1767578125\n"
- ]
- },
- {
"data": {
- "text/plain": [
- "(array([[ 241., 824., 9690., ..., 0., 0., 0.],\n",
- " [ 0., 0., 0., ..., 0., 0., 0.],\n",
- " [ 0., 0., 0., ..., 0., 0., 0.],\n",
- " ...,\n",
- " [ 0., 0., 0., ..., 0., 0., 0.],\n",
- " [ 0., 0., 0., ..., 0., 0., 0.],\n",
- " [ 0., 0., 0., ..., 0., 0., 0.]]),\n",
- " array([ 1. , 1.18 , 1.36 , 1.54 , 1.721, 1.9 , 2.08 , 2.262,\n",
- " 2.441, 2.621, 2.8 , 2.98 , 3.16 , 3.34 , 3.521, 3.701,\n",
- " 3.88 , 4.062, 4.242, 4.42 , 4.6 , 4.78 , 4.96 , 5.14 ,\n",
- " 5.32 , 5.5 , 5.68 , 5.863, 6.043, 6.223, 6.402, 6.582,\n",
- " 6.76 , 6.94 , 7.12 , 7.3 , 7.48 , 7.66 , 7.844, 8.02 ,\n",
- " 8.2 , 8.38 , 8.56 , 8.74 , 8.92 , 9.1 , 9.28 , 9.46 ,\n",
- " 9.64 , 9.82 , 10. ], dtype=float16),\n",
- " array([0.7344, 0.818 , 0.9014, 0.9844, 1.068 , 1.151 , 1.234 , 1.318 ,\n",
- " 1.402 , 1.485 , 1.568 , 1.652 , 1.735 , 1.819 , 1.902 , 1.986 ,\n",
- " 2.07 , 2.152 , 2.236 , 2.32 , 2.402 , 2.486 , 2.57 , 2.652 ,\n",
- " 2.736 , 2.82 , 2.904 , 2.986 , 3.07 , 3.154 , 3.238 , 3.32 ,\n",
- " 3.404 , 3.488 , 3.57 , 3.654 , 3.738 , 3.822 , 3.904 , 3.988 ,\n",
- " 4.07 , 4.156 , 4.24 , 4.32 , 4.406 , 4.49 , 4.57 , 4.656 ,\n",
- " 4.74 , 4.824 , 4.906 ], dtype=float16),\n",
- " <matplotlib.collections.QuadMesh at 0x7fe607ee0110>)"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
@@ -1394,14 +1622,13 @@
}
],
"source": [
- "batch_src, batch_labels, batch_padding_mask = mktunebatch(BSZ, test=True)\n",
- "model.eval()\n",
- "with torch.no_grad():\n",
- " output = model(batch_src, batch_padding_mask)\n",
- "print(criterion(output.squeeze(1), batch_labels).item())\n",
- "x = batch_labels.detach().to(torch.float16).cpu().numpy().flatten()\n",
- "y = output.detach().to(torch.float16).cpu().numpy().flatten()\n",
- "plt.hist2d(x, y, bins=50, norm=mpl.colors.LogNorm())"
+ "with open('training-loss') as f:\n",
+ " train_err = list(map(float, f.read().split()))\n",
+ " plt.suptitle('Log MSE vs Epochs')\n",
+ " plt.plot(torch.log(torch.tensor(train_err)[:500]), label='Train', color='blue')\n",
+ " plt.xlabel('Epochs')\n",
+ " plt.ylabel('Log MSE')\n",
+ " plt.show()"
]
}
],