diff options
Diffstat (limited to 'insane-shortest-paths.ipynb')
-rw-r--r-- | insane-shortest-paths.ipynb | 451 |
1 files changed, 427 insertions, 24 deletions
diff --git a/insane-shortest-paths.ipynb b/insane-shortest-paths.ipynb index e74974b..a3e58fb 100644 --- a/insane-shortest-paths.ipynb +++ b/insane-shortest-paths.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "execution_state": "idle", "id": "86ce5f44-94f6-43b0-a0d1-091b8134ffb6", "metadata": {}, @@ -22,8 +22,8 @@ "from collections import deque\n", "\n", "# Set manual seeds for reproducibility\n", - "torch.manual_seed(33)\n", - "random.seed(33)\n", + "# torch.manual_seed(33)\n", + "# random.seed(33)\n", "\n", "# Configuration\n", "NVTXS = 16\n", @@ -122,7 +122,6 @@ " Q = nn.Parameter(torch.zeros((2, HIDDENDIM), device=device))\n", " Q[0, START_REACH - 1 + head] = SUPABIG\n", " Q[1, NOTANS_FLAG_IDX] = 1\n", - "btrfs filesystem resize max\n", " K = nn.Parameter(torch.zeros((2, HIDDENDIM), device=device))\n", " K[0, head] = 1\n", " K[1, ANS_FLAG_IDX] = BIG\n", @@ -134,7 +133,6 @@ " self.mostKs.append(K)\n", " self.mostQs.append(Q)\n", " self.mostVs.append(V)\n", - "\n", " self.weirdKs = nn.ParameterList()\n", " self.weirdQs = nn.ParameterList()\n", " self.weirdVs = nn.ParameterList()\n", @@ -204,35 +202,440 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "execution_state": "idle", "id": "a9dd76f4-96f2-47b5-9bb9-a32a1b478dd4", "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Epoch [0/10000], Loss: 8.3387\n", - "Epoch [10/10000], Loss: 7.6416\n", - "Epoch [20/10000], Loss: 11.2689\n", - "Epoch [30/10000], Loss: 7.0312\n", - "Epoch [40/10000], Loss: 8.7287\n", - "Epoch [50/10000], Loss: 7.7182\n" + "/tmp/ipykernel_15454/381745885.py:148: UserWarning: Attempting to use hipBLASLt on an unsupported architecture! Overriding blas backend to hipblas (Triggered internally at ../aten/src/ATen/Context.cpp:296.)\n", + " ksrc = torch.matmul(src, K.unsqueeze(0).transpose(-2, -1))\n" ] }, { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 11\u001b[0m\n\u001b[1;32m 9\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss_fn(outputs, labels)\n\u001b[1;32m 10\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[0;32m---> 11\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 12\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m 13\u001b[0m train_err\u001b[38;5;241m.\u001b[39mappend(loss\u001b[38;5;241m.\u001b[39mitem())\n", - "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/torch/_tensor.py:581\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 573\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 574\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 579\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 580\u001b[0m )\n\u001b[0;32m--> 581\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 582\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 583\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/torch/autograd/__init__.py:347\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 342\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 344\u001b[0m \u001b[38;5;66;03m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 347\u001b[0m \u001b[43m_engine_run_backward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 353\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 354\u001b[0m \u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 355\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/torch/autograd/graph.py:825\u001b[0m, in \u001b[0;36m_engine_run_backward\u001b[0;34m(t_outputs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 823\u001b[0m unregister_hooks \u001b[38;5;241m=\u001b[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001b[1;32m 824\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 825\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 826\u001b[0m \u001b[43m \u001b[49m\u001b[43mt_outputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 827\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[1;32m 828\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 829\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attach_logging_hooks:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [0/10000], Loss: 0.0025\n", + "Epoch [10/10000], Loss: 6.4609\n", + "Epoch [20/10000], Loss: 11.0729\n", + "Epoch [30/10000], Loss: 10.3862\n", + "Epoch [40/10000], Loss: 8.3659\n", + "Epoch [50/10000], Loss: 8.4364\n", + "Epoch [60/10000], Loss: 7.0110\n", + "Epoch [70/10000], Loss: 6.2279\n", + "Epoch [80/10000], Loss: 14.5876\n", + "Epoch [90/10000], Loss: 13.5753\n", + "Epoch [100/10000], Loss: 15.5835\n", + "Epoch [110/10000], Loss: 14.3249\n", + "Epoch [120/10000], Loss: 11.1069\n", + "Epoch [130/10000], Loss: 11.6783\n", + "Epoch [140/10000], Loss: 10.2477\n", + "Epoch [150/10000], Loss: 10.8494\n", + "Epoch [160/10000], Loss: 8.3007\n", + "Epoch [170/10000], Loss: 6.8133\n", + "Epoch [180/10000], Loss: 5.5992\n", + "Epoch [190/10000], Loss: 6.9212\n", + "Epoch [200/10000], Loss: 5.9311\n", + "Epoch [210/10000], Loss: 6.0747\n", + "Epoch [220/10000], Loss: 4.9251\n", + "Epoch [230/10000], Loss: 3.9548\n", + "Epoch [240/10000], Loss: 5.9888\n", + "Epoch [250/10000], Loss: 4.9153\n", + "Epoch [260/10000], Loss: 6.6282\n", + "Epoch [270/10000], Loss: 4.7945\n", + "Epoch [280/10000], Loss: 6.8866\n", + "Epoch [290/10000], Loss: 5.7963\n", + "Epoch [300/10000], Loss: 4.1406\n", + "Epoch [310/10000], Loss: 5.8112\n", + "Epoch [320/10000], Loss: 6.3739\n", + "Epoch [330/10000], Loss: 4.7297\n", + "Epoch [340/10000], Loss: 3.6125\n", + "Epoch [350/10000], Loss: 4.7553\n", + "Epoch [360/10000], Loss: 5.1536\n", + "Epoch [370/10000], Loss: 3.3294\n", + "Epoch [380/10000], Loss: 4.8955\n", + "Epoch [390/10000], Loss: 5.0702\n", + "Epoch [400/10000], Loss: 5.5217\n", + "Epoch [410/10000], Loss: 4.0543\n", + "Epoch [420/10000], Loss: 3.8583\n", + "Epoch [430/10000], Loss: 4.4484\n", + "Epoch [440/10000], Loss: 6.0914\n", + "Epoch [450/10000], Loss: 5.3544\n", + "Epoch [460/10000], Loss: 3.1850\n", + "Epoch [470/10000], Loss: 4.5308\n", + "Epoch [480/10000], Loss: 3.6213\n", + "Epoch [490/10000], Loss: 3.3625\n", + "Epoch [500/10000], Loss: 3.4060\n", + "Epoch [510/10000], Loss: 3.2437\n", + "Epoch [520/10000], Loss: 3.9425\n", + "Epoch [530/10000], Loss: 3.4496\n", + "Epoch [540/10000], Loss: 2.8899\n", + "Epoch [550/10000], Loss: 2.5607\n", + "Epoch [560/10000], Loss: 3.9549\n", + "Epoch [570/10000], Loss: 4.4588\n", + "Epoch [580/10000], Loss: 4.3738\n", + "Epoch [590/10000], Loss: 3.3019\n", + "Epoch [600/10000], Loss: 2.4798\n", + "Epoch [610/10000], Loss: 6.1956\n", + "Epoch [620/10000], Loss: 4.3365\n", + "Epoch [630/10000], Loss: 4.9766\n", + "Epoch [640/10000], Loss: 4.2719\n", + "Epoch [650/10000], Loss: 5.0380\n", + "Epoch [660/10000], Loss: 5.7970\n", + "Epoch [670/10000], Loss: 5.8626\n", + "Epoch [680/10000], Loss: 4.7593\n", + "Epoch [690/10000], Loss: 5.7902\n", + "Epoch [700/10000], Loss: 5.9829\n", + "Epoch [710/10000], Loss: 6.7365\n", + "Epoch [720/10000], Loss: 5.3005\n", + "Epoch [730/10000], Loss: 5.8437\n", + "Epoch [740/10000], Loss: 3.8711\n", + "Epoch [750/10000], Loss: 5.0535\n", + "Epoch [760/10000], Loss: 3.7943\n", + "Epoch [770/10000], Loss: 4.5757\n", + "Epoch [780/10000], Loss: 4.7215\n", + "Epoch [790/10000], Loss: 4.4653\n", + "Epoch [800/10000], Loss: 5.5769\n", + "Epoch [810/10000], Loss: 5.0396\n", + "Epoch [820/10000], Loss: 5.2878\n", + "Epoch [830/10000], Loss: 5.4599\n", + "Epoch [840/10000], Loss: 4.5172\n", + "Epoch [850/10000], Loss: 4.8925\n", + "Epoch [860/10000], Loss: 5.1588\n", + "Epoch [870/10000], Loss: 5.8972\n", + "Epoch [880/10000], Loss: 4.9056\n", + "Epoch [890/10000], Loss: 4.8735\n", + "Epoch [900/10000], Loss: 5.2677\n", + "Epoch [910/10000], Loss: 4.3955\n", + "Epoch [920/10000], Loss: 5.1297\n", + "Epoch [930/10000], Loss: 4.2394\n", + "Epoch [940/10000], Loss: 6.8890\n", + "Epoch [950/10000], Loss: 5.1845\n", + "Epoch [960/10000], Loss: 4.4620\n", + "Epoch [970/10000], Loss: 5.1748\n", + "Epoch [980/10000], Loss: 4.9878\n", + "Epoch [990/10000], Loss: 3.9090\n", + "Epoch [1000/10000], Loss: 3.6580\n", + "Epoch [1010/10000], Loss: 3.5537\n", + "Epoch [1020/10000], Loss: 4.5068\n", + "Epoch [1030/10000], Loss: 3.2602\n", + "Epoch [1040/10000], Loss: 3.7987\n", + "Epoch [1050/10000], Loss: 3.3821\n", + "Epoch [1060/10000], Loss: 3.9663\n", + "Epoch [1070/10000], Loss: 3.8562\n", + "Epoch [1080/10000], Loss: 3.7811\n", + "Epoch [1090/10000], Loss: 4.0012\n", + "Epoch [1100/10000], Loss: 4.2564\n", + "Epoch [1110/10000], Loss: 3.2248\n", + "Epoch [1120/10000], Loss: 3.7483\n", + "Epoch [1130/10000], Loss: 3.0315\n", + "Epoch [1140/10000], Loss: 3.3677\n", + "Epoch [1150/10000], Loss: 3.5367\n", + "Epoch [1160/10000], Loss: 2.7389\n", + "Epoch [1170/10000], Loss: 3.0337\n", + "Epoch [1180/10000], Loss: 2.2630\n", + "Epoch [1190/10000], Loss: 1.8084\n", + "Epoch [1200/10000], Loss: 3.3239\n", + "Epoch [1210/10000], Loss: 4.1555\n", + "Epoch [1220/10000], Loss: 2.8362\n", + "Epoch [1230/10000], Loss: 3.1269\n", + "Epoch [1240/10000], Loss: 2.2517\n", + "Epoch [1250/10000], Loss: 2.9400\n", + "Epoch [1260/10000], Loss: 2.5436\n", + "Epoch [1270/10000], Loss: 2.6504\n", + "Epoch [1280/10000], Loss: 2.9571\n", + "Epoch [1290/10000], Loss: 2.4060\n", + "Epoch [1300/10000], Loss: 2.6461\n", + "Epoch [1310/10000], Loss: 2.4692\n", + "Epoch [1320/10000], Loss: 2.0638\n", + "Epoch [1330/10000], Loss: 3.0852\n", + "Epoch [1340/10000], Loss: 2.3448\n", + "Epoch [1350/10000], Loss: 2.6796\n", + "Epoch [1360/10000], Loss: 2.0310\n", + "Epoch [1370/10000], Loss: 1.8680\n", + "Epoch [1380/10000], Loss: 2.1846\n", + "Epoch [1390/10000], Loss: 2.3017\n", + "Epoch [1400/10000], Loss: 1.6519\n", + "Epoch [1410/10000], Loss: 1.6228\n", + "Epoch [1420/10000], Loss: 1.4328\n", + "Epoch [1430/10000], Loss: 1.5642\n", + "Epoch [1440/10000], Loss: 1.8962\n", + "Epoch [1450/10000], Loss: 1.4433\n", + "Epoch [1460/10000], Loss: 2.1973\n", + "Epoch [1470/10000], Loss: 1.8118\n", + "Epoch [1480/10000], Loss: 1.7155\n", + "Epoch [1490/10000], Loss: 2.7671\n", + "Epoch [1500/10000], Loss: 2.4518\n", + "Epoch [1510/10000], Loss: 1.0202\n", + "Epoch [1520/10000], Loss: 2.2548\n", + "Epoch [1530/10000], Loss: 1.4305\n", + "Epoch [1540/10000], Loss: 2.1796\n", + "Epoch [1550/10000], Loss: 1.7766\n", + "Epoch [1560/10000], Loss: 2.0751\n", + "Epoch [1570/10000], Loss: 1.6544\n", + "Epoch [1580/10000], Loss: 2.8767\n", + "Epoch [1590/10000], Loss: 2.2069\n", + "Epoch [1600/10000], Loss: 1.5974\n", + "Epoch [1610/10000], Loss: 2.0101\n", + "Epoch [1620/10000], Loss: 1.9445\n", + "Epoch [1630/10000], Loss: 2.3080\n", + "Epoch [1640/10000], Loss: 1.2395\n", + "Epoch [1650/10000], Loss: 1.2486\n", + "Epoch [1660/10000], Loss: 1.3250\n", + "Epoch [1670/10000], Loss: 1.1839\n", + "Epoch [1680/10000], Loss: 2.0569\n", + "Epoch [1690/10000], Loss: 2.3591\n", + "Epoch [1700/10000], Loss: 1.4618\n", + "Epoch [1710/10000], Loss: 1.3663\n", + "Epoch [1720/10000], Loss: 0.7300\n", + "Epoch [1730/10000], Loss: 2.6778\n", + "Epoch [1740/10000], Loss: 2.2931\n", + "Epoch [1750/10000], Loss: 1.4289\n", + "Epoch [1760/10000], Loss: 1.1453\n", + "Epoch [1770/10000], Loss: 1.0400\n", + "Epoch [1780/10000], Loss: 1.3603\n", + "Epoch [1790/10000], Loss: 1.5058\n", + "Epoch [1800/10000], Loss: 1.2890\n", + "Epoch [1810/10000], Loss: 1.1259\n", + "Epoch [1820/10000], Loss: 1.3006\n", + "Epoch [1830/10000], Loss: 1.4118\n", + "Epoch [1840/10000], Loss: 1.6406\n", + "Epoch [1850/10000], Loss: 1.3513\n", + "Epoch [1860/10000], Loss: 1.2380\n", + "Epoch [1870/10000], Loss: 1.4618\n", + "Epoch [1880/10000], Loss: 2.8634\n", + "Epoch [1890/10000], Loss: 2.4145\n", + "Epoch [1900/10000], Loss: 2.1412\n", + "Epoch [1910/10000], Loss: 2.7974\n", + "Epoch [1920/10000], Loss: 2.3607\n", + "Epoch [1930/10000], Loss: 2.1780\n", + "Epoch [1940/10000], Loss: 2.1544\n", + "Epoch [1950/10000], Loss: 1.1798\n", + "Epoch [1960/10000], Loss: 2.0259\n", + "Epoch [1970/10000], Loss: 8.5005\n", + "Epoch [1980/10000], Loss: 7.2836\n", + "Epoch [1990/10000], Loss: 5.1658\n", + "Epoch [2000/10000], Loss: 4.4845\n", + "Epoch [2010/10000], Loss: 3.2873\n", + "Epoch [2020/10000], Loss: 3.9213\n", + "Epoch [2030/10000], Loss: 3.4896\n", + "Epoch [2040/10000], Loss: 4.8792\n", + "Epoch [2050/10000], Loss: 3.8883\n", + "Epoch [2060/10000], Loss: 4.8546\n", + "Epoch [2070/10000], Loss: 3.5432\n", + "Epoch [2080/10000], Loss: 4.3267\n", + "Epoch [2090/10000], Loss: 3.4671\n", + "Epoch [2100/10000], Loss: 5.4011\n", + "Epoch [2110/10000], Loss: 5.8443\n", + "Epoch [2120/10000], Loss: 2.8416\n", + "Epoch [2130/10000], Loss: 5.1449\n", + "Epoch [2140/10000], Loss: 5.6858\n", + "Epoch [2150/10000], Loss: 5.1926\n", + "Epoch [2160/10000], Loss: 4.5664\n", + "Epoch [2170/10000], Loss: 4.3358\n", + "Epoch [2180/10000], Loss: 4.5456\n", + "Epoch [2190/10000], Loss: 3.5273\n", + "Epoch [2200/10000], Loss: 6.8660\n", + "Epoch [2210/10000], Loss: 3.4792\n", + "Epoch [2220/10000], Loss: 3.4052\n", + "Epoch [2230/10000], Loss: 2.8651\n", + "Epoch [2240/10000], Loss: 2.1104\n", + "Epoch [2250/10000], Loss: 2.1549\n", + "Epoch [2260/10000], Loss: 1.8513\n", + "Epoch [2270/10000], Loss: 2.8559\n", + "Epoch [2280/10000], Loss: 1.3817\n", + "Epoch [2290/10000], Loss: 1.4976\n", + "Epoch [2300/10000], Loss: 1.7325\n", + "Epoch [2310/10000], Loss: 1.5967\n", + "Epoch [2320/10000], Loss: 0.8749\n", + "Epoch [2330/10000], Loss: 1.5636\n", + "Epoch [2340/10000], Loss: 1.5302\n", + "Epoch [2350/10000], Loss: 0.7900\n", + "Epoch [2360/10000], Loss: 1.0777\n", + "Epoch [2370/10000], Loss: 0.6089\n", + "Epoch [2380/10000], Loss: 1.2180\n", + "Epoch [2390/10000], Loss: 1.3731\n", + "Epoch [2400/10000], Loss: 1.1782\n", + "Epoch [2410/10000], Loss: 0.9826\n", + "Epoch [2420/10000], Loss: 1.8233\n", + "Epoch [2430/10000], Loss: 0.8246\n", + "Epoch [2440/10000], Loss: 0.7204\n", + "Epoch [2450/10000], Loss: 1.2327\n", + "Epoch [2460/10000], Loss: 1.2843\n", + "Epoch [2470/10000], Loss: 1.1326\n", + "Epoch [2480/10000], Loss: 1.1369\n", + "Epoch [2490/10000], Loss: 1.0106\n", + "Epoch [2500/10000], Loss: 1.4563\n", + "Epoch [2510/10000], Loss: 1.0163\n", + "Epoch [2520/10000], Loss: 0.3823\n", + "Epoch [2530/10000], Loss: 0.8872\n", + "Epoch [2540/10000], Loss: 0.4118\n", + "Epoch [2550/10000], Loss: 0.4925\n", + "Epoch [2560/10000], Loss: 0.1141\n", + "Epoch [2570/10000], Loss: 0.5785\n", + "Epoch [2580/10000], Loss: 0.5831\n", + "Epoch [2590/10000], Loss: 0.0845\n", + "Epoch [2600/10000], Loss: 0.5621\n", + "Epoch [2610/10000], Loss: 1.0745\n", + "Epoch [2620/10000], Loss: 0.2378\n", + "Epoch [2630/10000], Loss: 0.6215\n", + "Epoch [2640/10000], Loss: 0.7897\n", + "Epoch [2650/10000], Loss: 0.9359\n", + "Epoch [2660/10000], Loss: 0.5567\n", + "Epoch [2670/10000], Loss: 4.0690\n", + "Epoch [2680/10000], Loss: 3.3254\n", + "Epoch [2690/10000], Loss: 3.3888\n", + "Epoch [2700/10000], Loss: 3.7329\n", + "Epoch [2710/10000], Loss: 2.9879\n", + "Epoch [2720/10000], Loss: 2.6283\n", + "Epoch [2730/10000], Loss: 2.4366\n", + "Epoch [2740/10000], Loss: 2.8078\n", + "Epoch [2750/10000], Loss: 1.2757\n", + "Epoch [2760/10000], Loss: 1.0685\n", + "Epoch [2770/10000], Loss: 2.2750\n", + "Epoch [2780/10000], Loss: 1.9740\n", + "Epoch [2790/10000], Loss: 1.4824\n", + "Epoch [2800/10000], Loss: 1.1974\n", + "Epoch [2810/10000], Loss: 1.6074\n", + "Epoch [2820/10000], Loss: 1.2541\n", + "Epoch [2830/10000], Loss: 1.7665\n", + "Epoch [2840/10000], Loss: 1.9748\n", + "Epoch [2850/10000], Loss: 1.9842\n", + "Epoch [2860/10000], Loss: 2.5544\n", + "Epoch [2870/10000], Loss: 1.6564\n", + "Epoch [2880/10000], Loss: 1.0362\n", + "Epoch [2890/10000], Loss: 1.3166\n", + "Epoch [2900/10000], Loss: 2.4819\n", + "Epoch [2910/10000], Loss: 1.1353\n", + "Epoch [2920/10000], Loss: 1.6106\n", + "Epoch [2930/10000], Loss: 2.1840\n", + "Epoch [2940/10000], Loss: 1.4362\n", + "Epoch [2950/10000], Loss: 0.9568\n", + "Epoch [2960/10000], Loss: 1.8224\n", + "Epoch [2970/10000], Loss: 1.2919\n", + "Epoch [2980/10000], Loss: 1.1351\n", + "Epoch [2990/10000], Loss: 0.7588\n", + "Epoch [3000/10000], Loss: 1.2207\n", + "Epoch [3010/10000], Loss: 1.3446\n", + "Epoch [3020/10000], Loss: 1.1581\n", + "Epoch [3030/10000], Loss: 1.0448\n", + "Epoch [3040/10000], Loss: 2.0898\n", + "Epoch [3050/10000], Loss: 1.1978\n", + "Epoch [3060/10000], Loss: 1.2886\n", + "Epoch [3070/10000], Loss: 1.0066\n", + "Epoch [3080/10000], Loss: 1.5037\n", + "Epoch [3090/10000], Loss: 0.6185\n", + "Epoch [3100/10000], Loss: 0.9835\n", + "Epoch [3110/10000], Loss: 0.6671\n", + "Epoch [3120/10000], Loss: 0.6967\n", + "Epoch [3130/10000], Loss: 1.0013\n", + "Epoch [3140/10000], Loss: 1.4123\n", + "Epoch [3150/10000], Loss: 1.8096\n", + "Epoch [3160/10000], Loss: 1.1270\n", + "Epoch [3170/10000], Loss: 1.9781\n", + "Epoch [3180/10000], Loss: 1.0191\n", + "Epoch [3190/10000], Loss: 2.4403\n", + "Epoch [3200/10000], Loss: 0.8882\n", + "Epoch [3210/10000], Loss: 1.0005\n", + "Epoch [3220/10000], Loss: 0.5002\n", + "Epoch [3230/10000], Loss: 1.2351\n", + "Epoch [3240/10000], Loss: 0.8264\n", + "Epoch [3250/10000], Loss: 0.7881\n", + "Epoch [3260/10000], Loss: 1.4120\n", + "Epoch [3270/10000], Loss: 0.3342\n", + "Epoch [3280/10000], Loss: 1.3266\n", + "Epoch [3290/10000], Loss: 1.7115\n", + "Epoch [3300/10000], Loss: 1.0647\n", + "Epoch [3310/10000], Loss: 0.4623\n", + "Epoch [3320/10000], Loss: 1.2075\n", + "Epoch [3330/10000], Loss: 0.4555\n", + "Epoch [3340/10000], Loss: 0.8706\n", + "Epoch [3350/10000], Loss: 0.9383\n", + "Epoch [3360/10000], Loss: 0.7436\n", + "Epoch [3370/10000], Loss: 0.8466\n", + "Epoch [3380/10000], Loss: 0.8379\n", + "Epoch [3390/10000], Loss: 0.6832\n", + "Epoch [3400/10000], Loss: 1.0414\n", + "Epoch [3410/10000], Loss: 0.5256\n", + "Epoch [3420/10000], Loss: 1.2059\n", + "Epoch [3430/10000], Loss: 0.7949\n", + "Epoch [3440/10000], Loss: 0.5962\n", + "Epoch [3450/10000], Loss: 0.8650\n", + "Epoch [3460/10000], Loss: 0.8154\n", + "Epoch [3470/10000], Loss: 0.4271\n", + "Epoch [3480/10000], Loss: 0.5725\n", + "Epoch [3490/10000], Loss: 1.0792\n", + "Epoch [3500/10000], Loss: 0.5633\n", + "Epoch [3510/10000], Loss: 0.2986\n", + "Epoch [3520/10000], Loss: 0.3941\n", + "Epoch [3530/10000], Loss: 1.0033\n", + "Epoch [3540/10000], Loss: 0.1960\n", + "Epoch [3550/10000], Loss: 0.9863\n", + "Epoch [3560/10000], Loss: 0.4395\n", + "Epoch [3570/10000], Loss: 0.9612\n", + "Epoch [3580/10000], Loss: 2.4734\n", + "Epoch [3590/10000], Loss: 5.5539\n", + "Epoch [3600/10000], Loss: 3.7807\n", + "Epoch [3610/10000], Loss: 4.0435\n", + "Epoch [3620/10000], Loss: 4.1143\n", + "Epoch [3630/10000], Loss: 3.3714\n", + "Epoch [3640/10000], Loss: 3.3396\n", + "Epoch [3650/10000], Loss: 4.2713\n", + "Epoch [3660/10000], Loss: 2.2012\n", + "Epoch [3670/10000], Loss: 1.7168\n", + "Epoch [3680/10000], Loss: 2.2133\n", + "Epoch [3690/10000], Loss: 2.7070\n", + "Epoch [3700/10000], Loss: 3.3160\n", + "Epoch [3710/10000], Loss: 3.6073\n", + "Epoch [3720/10000], Loss: 2.2879\n", + "Epoch [3730/10000], Loss: 2.8893\n", + "Epoch [3740/10000], Loss: 2.7971\n", + "Epoch [3750/10000], Loss: 1.7426\n", + "Epoch [3760/10000], Loss: 2.7662\n", + "Epoch [3770/10000], Loss: 2.1203\n", + "Epoch [3780/10000], Loss: 3.8798\n", + "Epoch [3790/10000], Loss: 2.6662\n", + "Epoch [3800/10000], Loss: 1.8491\n", + "Epoch [3810/10000], Loss: 1.5527\n", + "Epoch [3820/10000], Loss: 4.1708\n", + "Epoch [3830/10000], Loss: 1.6162\n", + "Epoch [3840/10000], Loss: 2.6064\n", + "Epoch [3850/10000], Loss: 1.9516\n", + "Epoch [3860/10000], Loss: 2.1771\n", + "Epoch [3870/10000], Loss: 2.3933\n", + "Epoch [3880/10000], Loss: 1.8314\n", + "Epoch [3890/10000], Loss: 3.2097\n", + "Epoch [3900/10000], Loss: 1.8215\n", + "Epoch [3910/10000], Loss: 2.1805\n", + "Epoch [3920/10000], Loss: 1.6260\n", + "Epoch [3930/10000], Loss: 1.5388\n", + "Epoch [3940/10000], Loss: 1.4422\n", + "Epoch [3950/10000], Loss: 1.2274\n", + "Epoch [3960/10000], Loss: 2.1992\n", + "Epoch [3970/10000], Loss: 1.3654\n", + "Epoch [3980/10000], Loss: 1.5024\n", + "Epoch [3990/10000], Loss: 1.9630\n", + "Epoch [4000/10000], Loss: 1.7742\n", + "Epoch [4010/10000], Loss: 1.9968\n", + "Epoch [4020/10000], Loss: 2.2213\n", + "Epoch [4030/10000], Loss: 2.1302\n", + "Epoch [4040/10000], Loss: 2.3094\n", + "Epoch [4050/10000], Loss: 2.3253\n", + "Epoch [4060/10000], Loss: 2.2924\n", + "Epoch [4070/10000], Loss: 0.9917\n", + "Epoch [4080/10000], Loss: 1.8697\n", + "Epoch [4090/10000], Loss: 1.4888\n", + "Epoch [4100/10000], Loss: 0.6206\n", + "Epoch [4110/10000], Loss: 0.9877\n", + "Epoch [4120/10000], Loss: 1.2839\n", + "Epoch [4130/10000], Loss: 0.4944\n", + "Epoch [4140/10000], Loss: 0.6533\n", + "Epoch [4150/10000], Loss: 0.4354\n", + "Epoch [4160/10000], Loss: 0.4216\n" ] } ], |