diff options
author | SIPB | 2024-12-10 22:19:47 -0500 |
---|---|---|
committer | SIPB | 2024-12-10 22:19:47 -0500 |
commit | 037acd689ee7f1f392d8209f1c3c5cabde90be5f (patch) | |
tree | a8c33dcf7f9ded7518372e611ca1e6278c88319c | |
parent | 392a46d0ecea7f5eef7e76f217007ccb04be593c (diff) |
Final commit
28 files changed, 7176 insertions, 1023 deletions
diff --git a/configs/bert_11M.json b/configs/bert_11M.json new file mode 100644 index 0000000..695789c --- /dev/null +++ b/configs/bert_11M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_19M.json b/configs/bert_19M.json new file mode 100644 index 0000000..891e78c --- /dev/null +++ b/configs/bert_19M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 6, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_35M.json b/configs/bert_35M.json new file mode 100644 index 0000000..b697e04 --- /dev/null +++ b/configs/bert_35M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 512, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 8, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_50M.json b/configs/bert_50M.json new file mode 100644 index 0000000..9e2facb --- /dev/null +++ b/configs/bert_50M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2560, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 8, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_67M.json b/configs/bert_67M.json new file mode 100644 index 0000000..bac1d03 --- /dev/null +++ b/configs/bert_67M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_6M.json b/configs/bert_6M.json new file mode 100644 index 0000000..b093bc5 --- /dev/null +++ b/configs/bert_6M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 160, + "initializer_range": 0.02, + "intermediate_size": 640, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/test.json b/configs/test.json new file mode 100644 index 0000000..b093bc5 --- /dev/null +++ b/configs/test.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 160, + "initializer_range": 0.02, + "intermediate_size": 640, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/data/ltr_riddles.txt b/data/ltr_riddles.txt new file mode 100644 index 0000000..1ea51e1 --- /dev/null +++ b/data/ltr_riddles.txt @@ -0,0 +1,40 @@ +I am footsteps. The more you take, the more you leave behind. +I am darkness. The more you have of me, the less you see. +I am fire. I am not alive, but I grow. I don’t have lungs, but I need air. I don’t have a mouth, but water kills me. +I am a towel. I get wetter the more I dry. +I am a candle. I become shorter the longer I live. +I am breath. I am light as a feather, yet the strongest man can’t hold me for much longer than a minute. +I am the wind. I am invisible, but you can feel me. I am intangible, but you can hear me. +I am a piano. I have keys but open no locks. +I am fog. The more of me there is, the less you see. +I am the future. I am always in front of you, but you can never see me. +I am a cloud. I don’t have wings, but I can fly. I don’t have eyes, but I can cry. Wherever I go, darkness follows me. +I am a battery. I am not alive, but I can die. +I am a heart. I can be stolen, but I can’t be touched. +I am the letter "M." I am something that comes once in a minute, twice in a moment, but never in a thousand years. +I am a promise. I can be cracked, but never broken. +I am your shadow. I am always with you, but I can’t be seen. +I am a footprint. I’m small but I can cover miles. +I am a secret. The more you have of me, the less you know. +I am light. I’m something that can fill a room, but I don’t take up space. +I am your name. I am always with you, but you never see me. I can be forgotten, but I never leave. +I am silence. The more of me there is, the less you hear. +I am a stamp. I can travel around the world while staying in the corner. +I am a code. I am something that can be cracked, but I can’t be touched. +I am sound. I am something you can hear, but not touch. I can be loud or soft, but I can never be seen. +I am the present moment. I am something that you can never keep, no matter how hard you try. +I am a rumor. I am not alive, but I grow. I don’t have a mouth, but I can speak. +I am a clock. I am always running, but I never move. +I am a hole. I get bigger the more you take away. +I am understanding. I can’t be seen, but I can be felt. I have no color, but I make things clear. +I am a pencil. I get smaller the more you use me. +I am a promise. I can be broken without being touched. +I am time. I am something that everyone has, but no one can keep forever. +I am a thought. I can be light as a feather, but even the strongest hands cannot hold me. +I am a debt. The more you take from me, the greater I become. +I am the horizon. I am often in front of you, but I’m never within reach. +I am the sky. You can see me every day, but I will never be seen the same way twice. +I am a reputation. I am not alive, but I grow over time. +I am a feeling. I can’t be touched, but I can touch everything. +I am a look. I never speak, but I can communicate. +I am a deadline. I can be hard, but I am not solid.
\ No newline at end of file diff --git a/data/make-histogram-thing.ipynb b/data/make-histogram-thing.ipynb new file mode 100644 index 0000000..72e7dbb --- /dev/null +++ b/data/make-histogram-thing.ipynb @@ -0,0 +1,546 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "execution_state": "idle", + "id": "7a21c467-a114-447d-bdb8-91778b59a3ad", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "csv_filename = 'wandb_export_2024-12-04T19_56_43.325-05_00.csv'\n", + "df = pd.read_csv(csv_filename)\n", + "# https://huggingface.co/datasets/ntotsuka123/ja-pretrain/viewer/default/train?p=1&row=120" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "execution_state": "idle", + "id": "0732274a-bc56-44a3-912f-e023c344bc56", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop([0, 1, 10, 11, 12, 15,16,17,18,19])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "execution_state": "idle", + "id": "23adfc0e-12af-4280-a31e-1601b1bfc3cf", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_size(name):\n", + " if 'distilbert_base' in name:\n", + " return '67M'\n", + " elif 'bert_6M' in name or 'bert_6_' in name:\n", + " return '6M'\n", + " elif 'bert_11' in name:\n", + " return '11M'\n", + " elif 'bert_19' in name:\n", + " return '19M'\n", + " elif 'bert_35' in name:\n", + " return '35M'\n", + " elif 'bert_base' in name:\n", + " return '110M' # Regular BERT base models have ~110M parameters\n", + " else:\n", + " return 'other'" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "execution_state": "idle", + "id": "895cde04-f6f8-4f47-8f48-16008dd68a55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>val_loss</th>\n", + " <th>size</th>\n", + " <th>Type</th>\n", + " <th>val_loss_exp</th>\n", + " <th>params</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>bert_6M_rtl_scratch</td>\n", + " <td>4.744476</td>\n", + " <td>6M</td>\n", + " <td>RTL</td>\n", + " <td>114.947528</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>bert_6_ltr_scratch</td>\n", + " <td>4.761365</td>\n", + " <td>6M</td>\n", + " <td>LTR</td>\n", + " <td>116.905354</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>bert_11_rtl_scratch</td>\n", + " <td>4.446950</td>\n", + " <td>11M</td>\n", + " <td>RTL</td>\n", + " <td>85.366156</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>bert_11_ltr_scratch</td>\n", + " <td>4.462379</td>\n", + " <td>11M</td>\n", + " <td>LTR</td>\n", + " <td>86.693476</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>bert_19_rtl_scratch</td>\n", + " <td>4.177320</td>\n", + " <td>19M</td>\n", + " <td>RTL</td>\n", + " <td>65.190932</td>\n", + " <td>19</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>bert_19_ltr_scratch</td>\n", + " <td>4.186271</td>\n", + " <td>19M</td>\n", + " <td>LTR</td>\n", + " <td>65.777026</td>\n", + " <td>19</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>bert_35_rtl_scratch</td>\n", + " <td>3.927857</td>\n", + " <td>35M</td>\n", + " <td>RTL</td>\n", + " <td>50.797983</td>\n", + " <td>35</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>bert_35_ltr_scratch</td>\n", + " <td>3.941595</td>\n", + " <td>35M</td>\n", + " <td>LTR</td>\n", + " <td>51.500691</td>\n", + " <td>35</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>distilbert_base_ltr_scratch</td>\n", + " <td>3.686307</td>\n", + " <td>67M</td>\n", + " <td>LTR</td>\n", + " <td>39.897253</td>\n", + " <td>67</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>distilbert_base_rtl_scratch</td>\n", + " <td>3.688566</td>\n", + " <td>67M</td>\n", + " <td>RTL</td>\n", + " <td>39.987461</td>\n", + " <td>67</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name val_loss size Type val_loss_exp params\n", + "2 bert_6M_rtl_scratch 4.744476 6M RTL 114.947528 6\n", + "3 bert_6_ltr_scratch 4.761365 6M LTR 116.905354 6\n", + "4 bert_11_rtl_scratch 4.446950 11M RTL 85.366156 11\n", + "5 bert_11_ltr_scratch 4.462379 11M LTR 86.693476 11\n", + "6 bert_19_rtl_scratch 4.177320 19M RTL 65.190932 19\n", + "7 bert_19_ltr_scratch 4.186271 19M LTR 65.777026 19\n", + "8 bert_35_rtl_scratch 3.927857 35M RTL 50.797983 35\n", + "9 bert_35_ltr_scratch 3.941595 35M LTR 51.500691 35\n", + "13 distilbert_base_ltr_scratch 3.686307 67M LTR 39.897253 67\n", + "14 distilbert_base_rtl_scratch 3.688566 67M RTL 39.987461 67" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['size'] = df['Name'].apply(extract_size)\n", + "df['Type'] = df['Name'].apply(lambda x: 'LTR' if 'ltr' in x else 'RTL')\n", + "df['val_loss_exp'] = np.exp(df['val_loss'])\n", + "df['params'] = df['size'].str.slice(stop=-1).apply(lambda s: int(s))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "5d5922ca-79bf-4761-954a-8755d70ad626", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_480557/1275629619.py:9: FutureWarning: \n", + "\n", + "The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.\n", + "\n", + " sns.barplot(x='size', y='val_loss_exp', hue='Type', data=df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1200x800 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Sort by size and then by LTR/RTL to group them together\n", + "df_sorted_pairs = df.sort_values(by=['params', 'Type'])\n", + "\n", + "# Plot configuration\n", + "fig, axes = plt.subplots(figsize=(12, 8))\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "# Create bar plot with LTR and RTL next to each other, no error bars (ci=None)\n", + "sns.barplot(x='size', y='val_loss_exp', hue='Type', data=df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n", + "\n", + "# Adjustments to the plot\n", + "# plt.xticks(rotation=45)\n", + "plt.title(\"Perplexity vs Model Size, From Scratch\", fontsize=20)\n", + "plt.xlabel(\"Model Size\", fontsize=20)\n", + "plt.ylabel(\"Test Perplexity\", fontsize=20)\n", + "# plt.legend(title=\"Model Type\",fontsize=20)\n", + "plt.legend(title=\"\",fontsize=20)\n", + "plt.tick_params(axis='both', labelsize=20)\n", + "\n", + "# Display the updated plot\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "execution_state": "idle", + "id": "bb25f31d-91b1-4bd5-be03-36a63f1e857e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.0, 122.75062123923252)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "axes.get_ylim()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "execution_state": "idle", + "id": "371bccdf-d3c1-4699-9c22-1e2c0b8cfd42", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Data\n", + "names = ['LTR', 'RTL']\n", + "val_loss = [np.exp(2.8237654270093375), np.exp(2.8326140656842465)]\n", + "\n", + "# Create bar plot\n", + "plt.bar(names, val_loss, color=['#72B6A1', '#E99675'])\n", + "\n", + "# Add labels and title\n", + "plt.xlabel('Model', fontsize=20)\n", + "plt.ylabel('Validation Perplexity', fontsize=20)\n", + "plt.title('DistilBERT Base Japan Perplexity', fontsize=20)\n", + "\n", + "# Show the plot\n", + "# plt.xticks(rotation=45, ha=\"right\") # Rotate x labels for better readability\n", + "plt.tick_params(axis='both', labelsize=20)\n", + "plt.tight_layout() # Adjust layout to fit everything\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "execution_state": "idle", + "id": "88c66310-bf62-44fc-b09d-5fb08ec084ad", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 104, + "execution_state": "idle", + "id": "5c86ffd5-d280-4b9f-b250-97d3e398f9a7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2794676/549159718.py:15: FutureWarning: \n", + "\n", + "The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.\n", + "\n", + " sns.barplot(x='model', y='ppl', hue='direction', data=riddles_rtl_df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 600x800 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "riddles_rtl_df = pd.DataFrame({\n", + " 'model': ['DistilBERT Base', 'DistilBERT Base', 'BERT Base', 'BERT Base'],\n", + " 'order': [0, 0, 1, 1],\n", + " 'direction': [\"LTR\", \"RTL\", \"LTR\", \"RTL\"],\n", + " 'ppl': [290, 160, 1010, 520],\n", + "})\n", + "\n", + "riddles_rtl_df_sorted_pairs = riddles_rtl_df.sort_values(by=['order', 'direction'])\n", + "\n", + "# Plot configuration\n", + "plt.figure(figsize=(6, 8))\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "# Create bar plot with LTR and RTL next to each other, no error bars (ci=None)\n", + "sns.barplot(x='model', y='ppl', hue='direction', data=riddles_rtl_df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n", + "\n", + "# Adjustments to the plot\n", + "# plt.xticks(rotation=45)\n", + "plt.title(\"QA Riddle Perplexities\", fontsize=20)\n", + "plt.xlabel(\"Model\", fontsize=20)\n", + "plt.ylabel(\"Validation Perplexity\", fontsize=20)\n", + "plt.legend(title=\"\", fontsize=20)\n", + "plt.tick_params(axis='both', labelsize=20)\n", + "\n", + "# Display the updated plot\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "execution_state": "idle", + "id": "26eba67f-ee2e-44ad-b18f-392aad75aedb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_480557/2407310871.py:15: FutureWarning: \n", + "\n", + "The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.\n", + "\n", + " sns.barplot(x='model', y='ppl', hue='direction', data=riddles_ltr_df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAMWCAYAAAAtQ/h0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACZmklEQVR4nOzdeVgT1+I//nfYFARBVFABFdxRKO5bKxX3WhdUXHBrtVXr0qu2emut2qq1trbXe79aW61al1IVVFRcC6iorYqKKIgriAKyyCqrQJLfHx8yvwRCCGYQgu/X8/g8Q2bmzBnMhHfOOXNGIpfL5SAiIiIi0RhUdwWIiIiIahsGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsqlabNm1Cu3bt0K5du+quSrk8PDzQrl07fPHFF9VdFapBDh8+LLx34+Pjq+QYU6dORbt27TB16tQqKV9b+nCdEtU0RtVdAfo/V69exbRp09Suq1u3LqytrdGhQwcMGzYMw4YNg5ER/+tI/8THx2PAgAHCzxKJBMHBwbCzs6tw3yFDhiA2Nlb4+bvvvsOYMWOqrK61iVwux9mzZ3HixAlERkbi+fPnePnyJczMzNCkSRM4OTnB1dUVb7/9Ntq3b1/d1a0Wpd+bFZk/fz4WLFhQpXXSB4cPH8ayZcuEnx0cHBAUFFThfomJifDw8IBMJhNeCw4Ohr29fbnlv8o17+HhgYSEBOHn77//HqNHj65wv6+++gp+fn7Cz56enli/fn2ljs0WLD1QUFCAZ8+eITg4GJ9//jkmTpyI58+fV3e1qORDWfHN/vDhw9VdHb0jl8sREBBQ4Xbh4eEq4Yq0l5qaismTJ2Pu3Lk4ceIEnjx5gry8PEilUmRnZ+Phw4c4c+YMNmzYgFGjRiE6Orq6q0x6LC4uDmFhYRVuFxAQoBKuXpdjx45VuM3Lly9x+vRpnY/FZpAaaNKkSfD29hZ+zsvLQ2RkJHbu3ImEhARERERg7ty58PX1hUQiqda6vgnOnj1b3VWolerUqYOXL1/i6NGjmDNnjsZtjx49qrIPaaewsBAffvghHjx4AABwdnbGmDFj0KFDB9SrVw85OTmIjo7GtWvXEBISguzsbLXlLFiw4I1qrRkwYAAWLlyocZuGDRu+tvroC+VrukuXLhq3fd3XtOI4ly9fRnJyMmxtbcvd9uzZs8jOzta5bmzBqoEaNmyItm3bCv/c3NwwZcoU+Pv7o0WLFgCA27dv49y5c9VdVaJX5uHhAQCIiYlBREREudsVFRXh5MmTQMkfPtKen5+fEK7GjBmDQ4cOYerUqejWrRs6dOiA7t27Y+LEifjpp5/wzz//4LvvvkP9+vWru9rVrn79+iqfwer+MWCVpbimT58+jcLCwnK3u3PnDh49egS8xmu6U6dOaNy4MWQyGY4fP65xW0X407VuDFh6xNLSErNmzRJ+vnjxYrXWh0gXrVq1QqdOnQClDzR1QkJCkJmZCWNjY7z33nuvsYb6Lzg4GABgZGSEZcuWwcCg/I98ExMTjBkzBo0bN36NNaTa5L333oOxsTEyMzMREhJS7naK693FxQVOTk6vpW6GhoYYMWKEyvHVSU9Px6VLlwBAq7FamrCLUM+4uroKy8+ePSuzXiqV4tixYzh9+jTu3LmDzMxM1KtXD05OThg8eDAmTZqEunXrqi176tSpCA0NRY8ePbB3717ExsZiz549uHTpEpKTk1FQUCAMQlQelL9nzx50794dBw8exOHDhxETE4PCwkI0b94cw4cPxwcffIA6derodN4vX76En58fAgMD8ejRI2RlZcHCwgLt2rXD8OHD4enpWWbg//Xr1zF16lTIZDK4u7tj27ZtasvOycnByJEjkZCQAGtrawQEBKBRo0bCesUgydKDHEvfUbVs2TKVwZ5QGgjr6emJqKgoODk54dSpUxrPNSMjA++88w6KioowadIkfP3111r9jpYtW4bDhw+jTp06+Oeff2Bubq5xe8WgcRcXFxw8eFBlXWRkJP7880/cuHEDycnJkEqlsLa2RsOGDfHWW2/h7bffhoeHh85d1KNGjUJkZCROnjyJL774Qu3NG4oPw/79+2vduiKTyRAQEIDjx48jKioKWVlZMDc3R5s2bTB06FB4eXnBxMREYxlZWVn47bffEBQUhGfPnqFevXpo164dJkyYgGHDhml9jq/y3hWL4jOiQYMGOrVMbdq0CZs3bwYA3L9/X2Wd4nNDW+oGMgPA8+fP8ccff+DixYuIj49HXl4eGjZsCDc3N0yYMAF9+vR55fpXJeVB2MHBwbCxscGff/6JkydP4smTJ8jMzCwzIL6wsBB+fn44ffo0Hj58iJycHFhaWsLZ2Rnvv/8+RowYUW4Y/uKLL+Dv7w87OzucPXsWz58/x86dO3H27FkkJSWhfv366NKlC+bPn482bdoI+8XHx2PXrl24ePEiEhMTYW5ujt69e+Nf//oXmjdvLsrvwtLSEu+++y4CAwNx9OhRDBo0qMw2xcXFOHHiBFBy/WdmZopybG2MGjUKO3fuxP3793Hv3j21N3ScOHECRUVFaNiwIfr27avT8Riw9IzyB7FUKlVZ9+zZM3zyySe4d++eyuuZmZkICwtDWFgY9u3bh61bt8LR0VHjcYKCgrBkyRLk5eVVWKeioiLMmjWrTIva/fv3cf/+fRw7dgy7du165W/G9+7dw9y5c1XuBEHJN43Lly/j8uXLOHDgAH799VeVYNStWzfMmjULv/76K0JCQuDj44PJkyeXKf+bb74Ryv72229VyhDLuHHjsHr1asTExCA8PBxubm7lbhsQEICioiIAwNixY7U+xsiRI3H48GG8fPkSgYGB8PT0LHfbiIgIYdC44ludwq5du/D999+XGYCalJSEpKQk3LlzB3/++SfCwsJQr149reunzvDhw/H9998jLS0Nly5dwrvvvquyPisrC+fPnwdKPhy1kZmZiU8++aTMQNuMjAyEhoYiNDQUPj4++O2338q9ezE6OhoffPABUlJShNcU4zcuX76MCxcuoHv37hXW5VXfu2IxNjYGSga6Z2ZmwsrKSvRjiOHYsWNYtWpVmc+bpKQknD59GqdPn8a4cePwzTff1Og7qDMyMjB//nzcvXu33G3i4+Px8ccfIyYmRuX11NRUXLhwARcuXMCBAwewZcuWCv+/7t27h48++kjlpqeCggKcPn0aFy5cwG+//YZu3brh8uXLWLBggcoYu5cvX+L48eO4ePEifHx8VMKYLkaNGoXAwECcP39e7Xvu77//RmpqKoyMjDB8+HD4+PiIclxttG/fHu3atcP9+/dx9OhRtQFL8YVu+PDhOr/Xau47ldRSjKcAABsbG2E5IyMD3t7eSExMhImJCcaPH4/u3bvDzs4OeXl5+Pvvv7Fnzx48efIEH3/8Mfz9/WFhYaH2GM+ePcOSJUtQt25dfPLJJ+jWrRsMDQ0REREBMzOzMtv/97//RUREBN5++21MmjQJTZo0QVJSEv7880/8/fffePToEebMmQNfX18YGhpW6nyfPHmCKVOmIDs7G+bm5pg8eTJcXV3RpEkTZGZm4uzZszhw4IAw8N/Hx0f4o4KSFqRLly4hMjISP/zwA3r16oVWrVoJ60+cOCHcVTJhwgRhDIE2AgICkJKSgpkzZwIAFi5cWKbPXjFOY+TIkfjhhx9QUFCAw4cPawxYirsR27VrBxcXF63r07NnT9jY2CAlJQUBAQEaA5ZiDIKhoSGGDx8uvH7v3j0hXNnb22PKlClo3749rKyskJubi8ePH+PKlSuiDfxv2LAh3n77bZw/fx5Hjx4tE7BOnTqFwsJCWFlZoV+/frh586bG8qRSKebMmSNs16NHD0yePBn29vZISUnBoUOHEBQUJASoI0eOlAmJOTk5mDlzphCu3nvvPYwePRoNGzZEbGwsfv/9dxw+fBgPHz7UWBdd37ti6NixIx48eAC5XI4VK1Zg/fr1Oofi0tatW4f8/Pxy16enp2PevHnIycmBhYVFmc+dkydPYunSpZDL5XBwcMCUKVPQqlUrWFtbIyEhAQcPHkRISAgOHjwIc3PzMq3ENcny5cvx4MEDjB49Gu+99x4aNWqExMRE4f81NzcXH3zwAeLi4gAAAwcOxNixY2FjY4P4+Hj4+PggNDQUN27cwJw5c+Dj41PuZ2Z+fj7mzZuHoqIiLF68GN27d4ehoSEuXryIX3/9FXl5eVi6dCl+//13zJs3DxYWFvj000/x1ltvobi4GH/99Rd2796NrKwsLF++HL6+vqL8Dtzd3WFlZYXMzEycOnUKkyZNUlmvCDDvvPMOrK2tRTlmZYwaNQo//PADjh8/jiVLlqi0FCqPB9X2C50mDFh6pLi4GL///rvwc48ePYTltWvXIjExEXZ2dti9ezccHBxU9u3ZsyeGDh2KyZMnIy4uDtu3b8eiRYvUHic+Ph42NjY4cOAAmjVrJrz+1ltvqd0+IiICEyZMwOrVq4XXOnXqhIEDB2L58uU4ePAgIiMjsX//frUtSJr8+9//RnZ2NpydnbFjx44yF+Tbb7+Nd999F7Nnz8atW7fg7++P8ePHC+uNjY3x448/wtPTE/n5+fj8889x4MABmJiYIDExUeh+a9myZaU/uNu2basSOG1tbdG2bVu121pYWGDIkCE4evQoTp48iS+//FJtV21UVJTw7bey870YGBhg+PDh+P3333HlyhWkpqaqbRWRyWTCoPHevXurbHPmzBnIZDKYmZnhwIEDZfbv1q0bvLy8kJ2dDVNT00rVrzyjRo3C+fPncfbsWeTk5Kh0bSo+jN97770Ku/QAYP/+/UK4Gj16NNavX6/Sjenh4YGNGzfi119/xdOnT7FlyxYsWbJEpYyff/4ZiYmJAIDFixdj9uzZwrpOnTphyJAhmDNnjjBOozy6vnfF4O3tjaNHj0Imk+Gvv/7ClStX0L9/f3Tr1g2urq5o06ZNpb/0lFb6s0ZZYWEhVq1ahZycHBgaGuI///kPLC0thfXp6elYuXIl5HI5xo4di9WrV6u0GnTs2BGDBw8W/s/27NmDCRMmVPm4nRcvXqh8mS3N0tJS7V1o9+/fx9q1a+Hl5SW81rFjR2F58+bNQrj65JNPVO5UVLy3lixZgoCAANy8eRMHDhxQuaNcWXp6OuRyOfz8/FS6+N566y00aNAAq1evRkJCAiZOnIjGjRtj3759Ku/Brl27wtDQEDt27MCtW7cQFRUFZ2fnSv2e1DExMcGwYcOwb98+HD16VCVg5eTkCOMCxQgwr2LEiBH46aefkJKSgsuXL6t0Ayo+b1q3bi2MD9UFB7nrgby8PISGhuLDDz9EeHg4AMDOzk4Y8BsfHy+M61mxYkW5H3jOzs7CxVrRnE2fffaZSrjSpFGjRuWGky+//FK4qPft26dVeQrXr18X/liuX7++3G87/fr1w5AhQ4ByzsvR0VGoX1RUFP73v/9BJpNh6dKlePHihRDCxAoM5VF86GZnZ+Ovv/5Su42i/sbGxhg5cmSlj6Ho7pNKpcI4h9KuXr0qtM6U7h5MTU0FSgKnpi4rCwsLjQOmK2PAgAGwsLBAQUEBzpw5I7yuPJ+Oth/Giu4Ga2trrFixQu0YsQULFgh/oP38/FTudiosLMShQ4eAkhZE5ZtKFIyNjfHtt99qbG0S672rK1dXV6xevVqo64sXL3D06FGsWLECo0aNQrdu3TBjxgz4+vpqNRygslatWiX8Hy5duhT9+vVTWb9v3z5kZ2fD1tYWX3/9dbldMgsWLICtrS1kMpnGAcpiCQ4OxogRI8r9t3HjRrX79erVSyVcKSssLBTGOrZp00bttBcSiQRff/210K1WUfdZeeOnxo4dK4x7TU9Px/Lly9W+B5XDz/Xr1zUeqzIU1+vNmzeFQImSL3AFBQWwsLCoVG+BmGxsbNC7d2+g1JxYynPyiRX+GLBqoM2bNwuTV7Zr1w6dO3dWGUjasGFD/Pzzz8I3+pCQEEilUpiampb5ACtNMW4kJSVF7SB5lPwBqcwg3mHDhpUbTurVqyeU9fDhw0pNkKr4puPo6FjhIzoU5xUZGYni4uIy65W7/3bu3InPPvtM+H3OmzevUl1xr6p79+5o2bIlUM4f08LCQuEC9/DweKXm844dOwrhobxbkRXHqFu3LgYOHKiyTjFO7tGjR7h9+3alj/8q6tSpI4QM5T+eiuWWLVtq7FJVSE5OFibJHDZsWLmD/I2MjITWwaysLNy5c0dYd+fOHWRlZQElMzeXN4i/SZMmGgfAivne1ZWXlxeOHTuGMWPGlOkeVAwfWLFiBQYPHowLFy6IdtwdO3YI7/OxY8figw8+KLONoqv53Xff1dhCaWRkJLwHKuomrk6lv7Aoi4yMxIsXL4CS91Z5LYfm5ubCZ+ajR49UxgEqk0gk5X5O161bV5jSx9LSEu+8847a7RwcHIT3hHIQ0lXnzp2F46u7pocOHarzjU+6UHx5/euvv4Tu7evXryMhIQEGBgYa/x8rg12EesTe3h5DhgzBzJkzVeZgiYyMBEr65CvTxJuamqq2laply5aVevNX1JTq6uoqfBN78OCB1oPdFef1+PFjrZ+BVlRUhKysLLVz1Hz77bcYOXIknj9/LnSRde3aVW0rRVUZN24cfvzxR1y5cgUJCQkqg6zPnj0r3FFTmcHtpY0YMQL/+9//cPv2bTx58kT4oENJiAsMDARKQlzpEDJ8+HBs27YNhYWFmDRpEt555x24u7uja9euaNOmTZVNbDt69GgcPHgQoaGhSExMRNOmTYVvl9q25CmPiVK+21Yd5e7uhw8fonPnzkCpMY4VhW4XFxdhAH5pYr93deXk5ITvvvsOq1evRmRkJG7duoXIyEhcu3YNSUlJQMldfHPmzMH27dt1vmPv/Pnz+PHHH4GSbmV1d8JKpVLhhpwDBw7gwIEDWpWtaGWtSq/yWBSoubNYmfL7s7zhFsrrFS3+Dx8+VBlvq9CgQQONg+AVd402b95c43Vbv3595ObmIjc3V2OdKmvkyJHYtGkTAgICMH/+fCQmJgpfanWd/kBXgwcPxtdff428vDwEBgZi5MiROHLkCFAy9KZp06aiHIctWDXQpEmTEBAQINxmHhgYiOvXryM4OBhLly4t8wGclpb2Sscpb2BqZW/nrugPgvJ6ReuANtLT0ytVD4Xyzsva2hqLFy8WfjY2NsYPP/yg8xiUyvD09ISxsTHkcjn8/f1V1im6pmxtbfH222+/8jGUv32VfgzN+fPnhW/R6r6ltWrVCj/99BMsLS1RXFyMc+fO4euvv8aIESPQu3dvLFmyRNSuBIVu3brBzs5OaKa/efMmnjx5AolEonXAUn5vVfSeVO7+VL5NXLmMiloQNXWhiv3eFYuxsTE6d+6MDz74AD/++CNCQkKwa9cu4Q4yqVSKb775BnK5/JWP8fDhQyxevBgymQx2dnbYtGmT2taprKysV2qxKygoeOW6VTXl8WWlvep7q7zPzIqGNCi68LXdTuzH1ii62WJjYxEeHo5jx45BLpfDzs4OXbt2FfVYlWVqaorBgwcDJa1qL1++FIYniDk2jC1YNZBiJndtKaZraNCgAfbs2aP1furmokHJnWWVUVWtGorzat++PTZs2KD1fuU9AqG4uFhlHFhRURFCQ0PL/T1UhUaNGgnzxPj7+2PevHmQSCRITk7G33//DZR8u9Ml9Dk4OKBz5864efOm8O1RQdFtaGVlVW63wZAhQ9CnTx+cPHkSly5dwvXr15Geno6MjAwcO3YMx44dg6enJ9atWyfaOCyJRIIRI0bg119/xdGjR4Xu6y5dumgcRK2pPDHq9KrEfu9Wpd69e2Pnzp0YMWIEMjMzERsbi7t3777SgOf09HTMmTMHubm5MDMzwy+//FJumFCeZsbLy6vch92XJvadlmLS9np4Ex5x5uDggC5duiAsLAxHjx7F1atXgZKWrZpw/qNGjcKRI0dw+fJl7N+/X7hxRxG8xMCAVQsomolzc3PRqlWr19oiAy2a7JVb2DR9wytNcV55eXmVCpzl+fnnn4VxRebm5sjJycHatWvRvXv3V/oj/qq8vLwQGBiI+Ph4hIaGomfPnjh69KjwB6eydw+qM2LECNy8eROxsbGIiIiAi4sLcnJyhC6toUOHavxDZWFhgQkTJmDChAlAybxQwcHB2Lt3L1JSUuDv748OHTpg+vTpOtdVYfTo0fj111/x6NEjYTxIZboSlN9bFb0nldcrd7Mot96mpaVpnC9O0zHEfu9WNRsbG7i7uwtjZJ48eVLpgFVUVIRPP/0U8fHxkEgk+PHHHzV2mSn/f8nlcr34PelC+Xwr896qzGdmTTN69GiEhYXh0KFDwjP9quvuwdJ69eoFW1tbJCcn46effgJKbripaILmymAXYS2g+CAsLCwUxn68ThUdU/k5c5WZzE5xXnFxcZUaHK/OzZs3sXXrVqDk9vjdu3fD2NgYubm5WLp0aZlJW7X1Kt/E3nnnHTRp0gRQGuyu6B7s1q2bMBBeF8OGDRPuyFK0Wp05c0b4kKvsIM5WrVph1qxZ8PX1FaamqGhG+spydHQUxk69fPkSJiYmGDp0qNb7K7+3Khqgr7xeeT/lP/Kano+ICt73Yr53XxflcT6v8r7+5ptvcO3aNQDAokWLKnyOm4mJifC7Lz0pbG2k/D67deuWxm3Le3/qm2HDhsHExET43HnrrbcqnOT6dVEezF5V4Y8Bqxbo37+/8IG4e/fu137806dPlzsuIi8vT/hD3Lp1a7WDNcujuOtPLpdXquuztNzcXCxZsgRSqRRWVlZYt24dOnXqhE8//RQo+XAv7zE6FVG+GUDTw02VGRgYCK1UZ86cwYULF4RZ1XUZ3K7M2tpauMvtxIkTKg841WUMRNOmTYUAmJGRIUpdlY0ePRomJiYwMTHBoEGDKjUe0NbWVphE9tSpU+UO2pVKpcL4N0tLS5V5ijp16iS0GBw9erTcsUjJycka58ES672rq8qMpVIOjJVt0d21axf8/PyAkvCuPHeYJsoP/K7tz1bt1KmT8H4+cuRIuWOecnJyXvkzs6apX78+Bg4cKFzT1T24vbRRo0YJdWvatKnOj8YpjQGrFnBychK+6Z84cUJlMlJ14uLiKnyaeGU8f/683Dtu1q9fL3QRlp7RtyJvv/220KKxY8cO4c6/8ty/f1/tDONr164VupzWrFkjjHP56KOPhMlaf/7551dq/bOyshK62p4+far1fmPHjoVEIkF+fj6+/PJLoGRKi8q02FRE8e3s+fPnCAgIEMZAvP/+++W2UAQFBQmD4NVJTEwUHvFRFWPXJk+ejIiICEREROA///nPK+2PkrFAa9euVbvN5s2b8ejRI6Cku1Z5ALbigccAcPfuXWzfvr3M/sXFxfjqq6+ExxmpI9Z7V1fz58+Hj49PhXNcHT58GJcvXwYANGvWrFLdgxcuXMAPP/wAlNy9+e2332q977Rp04QW0WXLllU4O/758+fLPApMUX/FtDabNm3S+vivk4mJCcaNGweU3K26ZcuWMtvI5XKsWbNG+PJS2YmZa6KNGzcK13R5k6ZWl7Zt2wp1O3/+vOjDazgGq5b4+uuvERkZibi4OKxfvx7BwcEYNWoU2rRpAxMTE2RmZuLevXu4ePEirly5gkGDBuH9998X5didOnXCvn37EB8fj4kTJ6Jp06ZITEzEvn37hG/5zs7OmDhxYqXL/umnn+Dl5YXMzEwsWrQIx44dw3vvvYeWLVvCwMAAaWlpuHv3Ls6dO4fw8HDMmDFDZQK7wMBAoRtuzJgxKgMYDQwM8P3332PkyJHIzs7G559/jiNHjpT7MGx1jIyM4OLiIowzcHZ2RocOHYTuOUtLS7W3Utvb26NPnz74+++/hS6kYcOGqX0U0asaMGAAzMzMkJeXh7Vr1wrdoJq6B3fv3o3PP/8c7u7uwmOFLCwskJWVhcjISPzxxx9Ca2VlA/PrMHHiROEuxMOHD+PZs2fw9vaGvb09nj9/jkOHDgmTvDZv3hxz584tU8a8efNw6tQpJCUl4ccff8S9e/cwatQolUflREREoFOnThpDua7vXTEkJiZi9erV+PHHH+Hh4YFu3brB0dERlpaWePnyJWJiYnD69GmEhIQAJV2Dy5Yt07qLMCsrC4sXL4ZUKoWZmRkWLlyIJ0+eaNzH0dFR+FLSqFEjfP/99/j000/x/PlzjB07Fp6enujXrx+aNGmC4uJiJCUl4fbt2zhz5gzi4uLw66+/qn2GnD6YN28eAgMDERcXh02bNuHBgwcYM2YMGjdujPj4ePzxxx/CVAadO3cWxkDS/7lx44ZW23l4eNSI524yYNUSVlZW2LdvHxYuXIjr16/j2rVrwngIdcR8HtmiRYvw+++/4+LFi2qb+Z2cnPDrr7++0oMzmzdvjv379+PTTz/FgwcPcO7cOZw7d67c7ZXPKyUlBV999RVQ0uWhWFbWrFkzrFy5EkuWLMHjx4+xfv16tXP2aDJ79mzMmTMHmZmZ+Oyzz1TWzZ8/X+2MzSiZE0tx5yBE7B5UMDMzw4ABAxAQECC0SrVv377CMR35+fnCA3bVMTAwwIIFC8pMUloTGBoa4tdffxUe9nzlyhVcuXKlzHatWrXCb7/9pvY6sLCwwPbt2/Hhhx/i+fPnOH78eJkW3zFjxqB79+4aH6+ky3tXLE2aNMGdO3eQl5en9jyUWVhY4KuvvqrUXVTZ2dnCA4Tz8vIwY8aMCvcJDg5Waf0cPHgwtmzZgmXLliEzMxP79+/H/v371e5rYGCgdtoB5SEKNeEPa3nMzc2xa9cu4WHPZ86cUXl6gUKXLl3wyy+/vPYblmq6gwcPCrPha3LkyJEa8T5gwKpFGjduDB8fH5w/fx7Hjx9HeHg4UlNTUVxcDAsLC7Ro0QKdO3eGh4eHMHu0GIyNjbFt2zYcOHAAR48eRUxMDIqKiuDg4ID33nsPH374YaVahUpzdHTEkSNHcOrUKfz111+IiIhAenq6MKbK0dERXbt2xaBBg4TxNHK5XPjANjQ0xIYNG8r9AzZy5EiEhITg+PHj2LdvH/r37w93d3et6/fuu+9i165d2LNnDyIiIpCRkaGx+0hh4MCBQguTk5MTunTpUonfinZGjBihMhdWRYPbf/rpJ5w/fx5Xr15FdHQ0UlNTkZGRARMTE9jZ2aFbt26YOHFijW5BsLKygo+PD44dO4bjx4/j7t27yMrKQr169dC2bVsMHTq0TNdgaW3atMHx48fx22+/ISgoCM+ePRP2Hz9+PN5//32tHm3zKu9dMW3ZsgUxMTG4dOkSwsLC8OjRIyQlJSEvLw916tSBlZUV2rRpg759+2LEiBHV8vBdlLQ4BAcHw9fXFyEhIXj06BGysrJgaGiIRo0aoU2bNujVqxeGDBmidhJIxSPELC0tNT7kvCawt7fH0aNH4efnh9OnT+PBgwfIzc2FpaUlOnToIDyOR6wpUKj6SOS6zChHb6yrV68K89bs2bMHPXv2rO4q6Z3Y2FjhETGff/45Pv744+quEpFe8vDwQEJCAhYsWKAy7xtRdWJEJqomiqkZjIyMatzdNUT6IiEhAQkJCbCwsBB1XjYiXTFgEVWDFy9ewNfXFygZjK7t8xmJSJVirOnUqVNhYWFR3dUhEnAMFtFrkpaWhpycHKSkpGDTpk3IzMyERCLRes4gIipr9OjRbAGmGokBi+g12bBhQ5kHPHt7e1fJ4GYiIqpeDFhEr5mxsTGaN2+O8ePHY8qUKdVdHSIiqgK8i5CIiIhIZGzB0hMymQzFxcUwMDB4pQexEhERkXpyuRwymQxGRkaizUHGgKUniouLERERUd3VICIiqrVcXFw0TkJcGQxYekKRqF1cXPj4BCIiIhFJpVJERESIOoM+A5aeUHQLGhoaMmARERFVATGH4HCiUSIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJTG8flTN16lSEhoZWap89e/agZ8+eateFhITA19cXERERSE9Ph7W1NVxcXDB+/Hi4u7trVX5xcTH8/PwQEBCAmJgY5OXlwcbGBn369MHUqVPRpk2bStW3KsjlchQVFUEmk1V3VUhPGBgYwNjYWNRHSBAR1XZ6G7Aqy8DAAC1btizzukwmw4oVK3Dw4EGV15OTk5GcnIygoCB4eXlh9erVGh8CmZ6ejlmzZiEiIkLl9bi4OBw4cAD+/v5YuXIlvLy8RDwr7eXl5SErKwvZ2dmQSqXVUgfSX4aGhrCwsIClpSXMzMyquzpERDWe3gasdevWIT8/X+M2jx49wqJFiwAAvXv3hq2tbZltNm7cKIQrZ2dnfPTRR3BwcEBcXBy2b9+OqKgo+Pn5wdraGosXL1Z7HKlUivnz5wvhavDgwfDy8oKVlRVu3bqFX375BWlpaVi5ciVsbGy0bhETS3Z2NuLj42FsbAwrKyvUq1cPBgYGbJGgCsnlcshkMuTm5uLFixfIzMyEvb09LCwsqrtqREQ1mt4GLAcHhwq3OXr0qLA8atSoMusfP36MnTt3AgA6deoEHx8f1K1bFwDg6uoKDw8PTJkyBZGRkdixYwfGjh2LFi1alCnH398fN27cAAB4e3tj1apVwjpXV1f069cPY8aMQU5ODr799lv07dsXRkav51efl5eH+Ph41K9fH82aNWOooldSr149NG7cGM+ePUN8fDxatGjBliwiIg1q7SB3mUyGgIAAAICZmRkGDx5cZpvdu3ejuLgYALBixQohXCmYmppixYoVQMn4ql27dqk9liKkWVlZYenSpWXWt2jRArNnzwYAPHnyBIGBgTqfn7aysrJgbGzMcEU6k0gkaNasGYyNjZGVlVXd1SEiqtFqbcC6fPkykpOTAQBDhw6Fqampynq5XI7g4GAAgJOTE9zc3NSW4+bmBkdHRwBAcHAw5HK5yvrHjx8jOjq63OMoeHp6CstBQUE6nZu25HI5srOzUb9+fYYrEoVEIkH9+vWRnZ1d5logIqL/X60NWEeOHBGW1XUPxsfHIyUlBQDQvXt3jWX16NEDKBn4Hh8fr7JO0TWovJ06jRs3FgbZh4WFaX0euigqKoJUKkW9evVey/HozWBmZgapVIqioqLqrgoRUY1VKwNWbm6u0EpkZ2endmqGR48eCctOTk4ay1NeHxMTo7JO0XpVmXISExORl5dX4XnoSjEVg6a7H4kqy9DQEFB6fxERUVm18i/vX3/9JQSYESNGqO0eS0pKEpabNGmisTzl9YmJieWWo+4uRWVNmzYFSrrulPerauweJDHx/UREVDG9vYtQE+XuwdGjR6vdJjc3V1iu6G4o5XFVpVuelMupqCtOUznaqswcVlKpFHK5XPhHJAbF+0kqlXJONSKqFaris6zWBaykpCRhhnflAeqlvXz5Ulg2NjbWWKaJiYmwXFBQUCXlaKv0RKYVMTIyQn5+PrtzSDQvX75EUVER7t27V91VISKqsWpdwDp27JgQJsprvQKAOnXqCMsVDdYtLCwUlktP5VC6HOWfK1OOtlxcXIQxMBUpKCjAkydPYGpq+srHIypN8eic1q1b831FRLWCVCqtdANGRWpdwFJMLmpiYoL33nuv3O2Uu/Mq6q5TnjG+dHeicjm5ubkaA5amcrRlaGiodcAyNDSERCIR/hGJQfF+qsx7kYjoTVOrAlZERIRwd2D//v1haWlZ7rbKA9crGnCuvF4xUF1dOcnJybC2ti63HMUAeYlEUuHAetJPV69exbRp0wAA8+fPx4IFC1TWb9q0CZs3b9bpGJ6enli/fj2g4aHnBgYGsLCwgL29Pbp06YIJEybUiIeNExG9KWrVXYTKj8bR1D0IAK1btxaWS0+9UJry+tJTMbRq1arS5TRt2pSPGaEqJZPJkJWVhTt37mDv3r0YNWoUtm3bVt3VIiJ6Y9SaFqyioiKcOHECAGBtbY1+/fpp3N7e3h42NjZISUnBtWvXNG6rWG9rawt7e3uVdV27dhWWQ0NDMXz4cLVlPH/+HLGxsQCALl26aHlWr59MLoOBpFblbkFNODdvb28MGTJE7brg4GD897//BQAsXLgQAwYMULtdeS2zikdDoeR6iIuLQ1BQEAICAiCVSvHTTz/BwcEBw4YNE+VciIiofLUmYF24cAHp6ekAgPfff7/ChylLJBIMGDAA+/btQ0xMDMLDw9U+Lic8PFxoeRowYECZsUyOjo5o1aoVoqOjcfr0aXzxxRdqH5fj7+8vLA8cOPCVz7OqGUgM4HPrIpJzatez5mzNLTH5rXequxpo2LAhGjZsqHZdZGSksGxra4u2bdtWquzS23fs2BFDhw7FW2+9hbVr1wIAfv75ZwYsIqLXoNYErMp0DypMnz4dvr6+kEqlWLNmDXx8fFTuiiooKMCaNWuAkukOpk+frracGTNmYPny5cjMzMSGDRuwcuVKlfVPnz7F1q1bgZIHPw8aNOiVzvF1Sc7JQsKL9OquBolk8uTJ2LlzJ549e4aHDx/i+fPnaNy4cXVXi4ioVqsVfUFZWVk4d+4cUPItvmPHjlrt5+joiJkzZwIlrQeTJk3CyZMnERERgZMnT2LSpElCq8LMmTOFZwmW5unpKXT7+fj44NNPP8XFixdx+/Zt/PHHH5g4cSJycnJgYGCA5cuXV9i6RiQmAwMDlTGHpZ9GQERE4qsVf+lPnjwpzDGl7sHOmixatAhpaWk4dOgQoqKisGjRojLbjBs3DgsXLiy3DENDQ/z888+YNWsWIiIicObMGZw5c0ZlGxMTE6xcuRLu7u6Vqh+RGJQnwa1oQlwi0kwuk0HCZ7y+dvr2e68VAUvRPWhoaIgRI0ZUal8DAwOsW7cOQ4YMwYEDBxAREYGMjAw0aNAALi4umDBhglahyNraGvv374evry+OHz+O6Oho5Ofnw8bGBr1798a0adN4mzxVG+WHkjdr1qxa60Kk7yQGBsgK2gdpRkp1V+WNYdjABpYDJ1V3NSqlVgSs/fv361yGu7u7zq1LRkZG8Pb2hre3t871IRLLX3/9JdzB2rt3b43zwxGRdqQZKShOTajualANVisCFhGpKiwsFKZp+OWXX4CSh42r6wInIiLxMWAR1RLt2rUrd13Hjh2xfPlyvPXWW6+1TkREbyr9GS1GRK/E2NgYY8eOVZkUl4iIqhZbsIhqCeWZ3F+8eIH79+9j165dePr0KVavXo38/Hx89NFH1VpHIqI3BQMWUS1Reib3bt26YdSoUfD29sb9+/exceNG9OjRA66urtVWRyKiNwW7CIlqMXNzc/zwww8wMDBAcXExvv/+++quEhHRG4EBi6iWa9++Pd5//30AwPXr13HhwoXqrhIRUa3HgEX0BpgzZw4MSmZAVkzbQEREVYcBi+gN0KpVK+Eh42FhYbhy5Up1V4mIqFbjIHeiKnL37l0cPny4wu169er1Wh5fM2fOHOEZmb/88gt69epV5cckInpTMWARVZHg4GAEBwdXuN3PP//8WgKWs7Mz3N3dERISgitXriA8PBxubm5VflwiojcRAxaVYWte+55VVxvP6VXMmTMHISEhAIAtW7Zg27Zt1V0lIqJaiQGLVMjkMkx+653qrkaVkMllMJBU7bDDnj174v79+6+075gxYzBmzJhK7bN3795Kbd+lS5dXrh8REWmPg9xJRVUHkOpUm8+NiIhqFv7FISIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhEZlTdFSCqTa5evYpp06apXVe3bl1YWVmhffv2GDRoEEaOHAkTExMAgIeHBxISEnQ69p49e9CzZ0/Ex8djwIABAABPT0+sX79ep3KJiKjy2IJF9JoUFBQgKSkJ58+fx/LlyzFmzBjEx8dXd7WIiKgKsAWLVMhlMkgMamfuft3nNmnSJHh7ews/p6Wl4eHDh9ixYweSkpLw8OFDfPLJJzhy5Ah27NiBoqIiteUsW7YMkZGRAICAgIByj2dvb18FZ0FERK+CAYtUSAwMkBW0D9KMlOquiqgMG9jAcuCk13rMhg0bom3btiqv9e7dG2PGjMHIkSORkJCABw8eIDAwEEOHDi23HDMzM2G5dHlERFQzMWBRGdKMFBSn6jYeiMpnbm6OTz75BF999RUA4J9//tEYsIiISP/Uzr4gohquXbt2wnJSUlK11oWIiMTHgEVUDYyNjYVlIyM2JBMR1TYMWETVIDo6Wli2s7Or1roQEZH4GLCIXjOpVIodO3YIPw8ZMqRa60NEROJjwCJ6TdLT03H58mVMmTIFUVFRQEm46tatW3VXjYiIRMbBH0RVZPPmzdi8ebPadaamppg4cSI+++yz114vIiKqemzBIqoG7du3x9SpU1UGuxMRUe3BFiyiKqI8k7tUKkVSUhLOnDmDo0eP4ubNm5g6dSoOHjwIa2vr6q4qERGJjC1YRFVEMZN727Zt0aFDB/Tv3x/r16/HunXrAAAJCQlYvnx5dVeTiIiqAAMW0Wvm6ekp3Dl49uxZXL58ubqrREREImPAIqoGixYtgqGhIQBg48aN1V0dIiISGQMWUTVwdHTEsGHDAAC3bt3C33//Xd1VIiIiETFgEVWT2bNnQyKRAAB++eWX6q4OERGJiHcRElWTtm3bwsPDA8HBwbh27RquX78u+qSjT548weHDhyvcztXVFa1btxb12EREbzIGLKJqNGfOHAQHBwMlrVjKj9ARQ1hYGMLCwircbtmyZQxYREQiYsCiMgwb2FR3FURXU8/J1dUVffv2xd9//41Lly7h9u3bcHV1re5qERGRjhiwSIVcJoPlwEnVXY0qIZfJIDGo2mGHPXv2xP379yu1z86dOzWu37t3b6XKs7e3r3QdiIhIXBzkTiqqOoBUp9p8bkREVLPwLw4RERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREekgml1V3FYhIA040SkSkhwwkBvC5dRHJOVnVXZU3SvvGzfBe2y7VXQ3SAwxYRER6KjknCwkv0qu7Gm8Um3r1q7sKpCfYRUhEREQkMgasN4BcLq/uKlAtwvcTEVHFGLBqMUNDQwBAcXFxdVeFahHF+0nx/iIiorIYsGoxIyMj1KlTB1lZHARL4snKykKdOnVgZMQhnERE5WHAqsUkEgmsrKyQnZ2NjIyM6q4O1QIZGRnIzs6GlZUVJBJJdVeHiKjGqlVfQZ89e4aDBw/i/PnzePbsGXJzc2FtbQ07Ozv07NkTw4YNQ9u2bcvdPyQkBL6+voiIiEB6ejqsra3h4uKC8ePHw93dXas6FBcXw8/PDwEBAYiJiUFeXh5sbGzQp08fTJ06FW3atBHxjCvWoEEDFBYWIikpCS9evIC5uTnq1q0LAwMD/oGkCsnlcshkMhQUFCAnJwd5eXlo0KABGjRoUN1VIyKq0WpNwNq7dy/+85//IC8vT+X1pKQkJCUl4caNG8jJycHy5cvL7CuTybBixQocPHhQ5fXk5GQkJycjKCgIXl5eWL16NQwMym/0S09Px6xZsxAREaHyelxcHA4cOAB/f3+sXLkSXl5eOp+vtiQSCZo0aQJTU1O8ePECqampkMk4QSFVjoGBAczMzNCsWTNYWlpWd3WIiGq8WhGwtmzZgv/9738AgJYtW2L8+PFwcXGBhYUFMjMzERUVhcDAwHLD0caNG4Vw5ezsjI8++ggODg6Ii4vD9u3bERUVBT8/P1hbW2Px4sVqy5BKpZg/f74QrgYPHgwvLy9YWVnh1q1b+OWXX5CWloaVK1fCxsZG6xYxsVhaWsLS0hIymQzFxcUMWaQ1AwMDGBkZafxyQUREqvQ+YF2+fFkIV6NHj8batWthbGyssk3v3r0xc+ZMFBYWltn/8ePH2LlzJwCgU6dO8PHxQd26dQEArq6u8PDwwJQpUxAZGYkdO3Zg7NixaNGiRZly/P39cePGDQCAt7c3Vq1aJaxzdXVFv379MGbMGOTk5ODbb79F3759q2WQsIGBAUxMTF77cYmIiN4kev2VVCaT4euvvwYAtG/fHt9++22ZcKVMXbDYvXu3cNv5ihUrhHClYGpqihUrVgAl46t27dqltmxFSLOyssLSpUvLrG/RogVmz54NAHjy5AkCAwMrcaZERESkT/Q6YF26dAmxsbEAgI8//rjSLUJyuRzBwcEAACcnJ7i5uandzs3NDY6OjgCA4ODgMhMtPn78GNHR0QCAoUOHwtTUVG05np6ewnJQUFCl6kpERET6Q68D1unTp4GSgdzvvvuu8HpmZiZiY2ORmZmpcf/4+HikpKQAALp3765x2x49egAlA9/j4+NV1im6BpW3U6dx48Zo2bIlACAsLEzj8YiIiEh/6fUYrFu3bgEA7OzsYG5ujoCAAGzbtg0PHjwQtlEMep86dWqZLsJHjx4Jy05OThqPpbw+JiYGDg4Ows+K1itty4mNjUViYiLy8vJgZmam1bkSERGR/tDbFiyZTIaYmBigZK6ntWvX4vPPP1cJVwAQGxuLH374AdOmTcOLFy9U1iUlJQnLTZo00Xg85fWJiYnllmNra6uxnKZNmwIl3ZPK+xEREVHtobctWNnZ2cJUAw8ePEBERAQaN26MpUuXwt3dHXXq1EFERAR+/PFHhIeH4+bNm/jyyy+xefNmoYzc3FxhuaKWJOVxVaXn2lIup169eq9cjjakUmml9yGi2ofPgqQ3UVX9DayKcvU2YOXn5wvLL1++hKmpKfbs2aPSRde9e3fs3r0bEyZMwL179xAYGIhbt27hrbfeEvZT0HT3IUrdgVhQUKCyTqxytFF6ElMievOYmprC2dm5uqtB9Nrdv39f5e9/Taa3Aav0eKpx48apHf9Ut25dLFq0SJgi4eTJk0LAqlOnjrBdUVGRxuMpz6FVeiqH0uUo/1yZcrTh4uLCb65ERPRGateuXZWUK5VKRW/A0NuAZW5urvLz22+/Xe62vXv3hpGREYqLi1V+gcrdeRV11ykn5tLdicrl5ObmagxYmsrRhqGhIQMWERG9kfTp75/eDnI3MTGBtbW18LOmQep16tQRHk6bnp6udp+KBpwrr1cMVFdXTnJyssZyFAPkFc8IJCIiotpHbwMWALRu3VpYrujZeooBbMqTkSrvr7gjsTzK60t3RbZq1arS5TRt2pRTNBAREdVSeh2wlCcHjYuLK3e7nJwcZGRkAKWmUbC3t4eNjQ0A4Nq1axqPpVhva2sLe3t7lXVdu3YVlkNDQ8st4/nz58LM8126dNF4PCIiItJfeh2wBg8eLCxrerZfYGCg8Hgb5TAkkUgwYMAAoKRlKTw8XO3+4eHhQsvTgAEDIJFIVNY7OjoKrVinT58u9w4Hf39/YXngwIFanSMRERHpH70OWO3bt0e/fv0AACdOnMDly5fLbPP8+XP897//BUqmUBg7dqzK+unTpwuD5tasWVNm6oSCggKsWbMGKOlenD59utq6zJgxAyh5TM+GDRvKrH/69Cm2bt0KlDz4edCgQa90zkRERFTz6XXAAoAvv/wS9evXh0wmw+zZs/HTTz/h+vXriIiIgI+PD8aNGycMUP/Xv/5VZqZ1R0dHzJw5EwAQGRmJSZMm4eTJk4iIiMDJkycxadIkREZGAgBmzpwpPEuwNE9PT6Hbz8fHB59++ikuXryI27dv448//sDEiRORk5MDAwMDLF++vNIPpiYiIiL9IZEr+s702PXr1/Gvf/0LqampatdLJBLMmTMHCxcuVLteJpPhq6++wqFDh8o9xrhx47BmzRoYGJSfSdPT0zFr1qxy59IwMTHBypUr4eXlVeE5lSaVShEeHg43Nze9uk2ViKrOf/4+joQX6VpsSWLp3LQlprj1Q7rf/1CcmlDd1XljGDWyg7XXv6qs/Kr4G1srmlG6deuG48eP448//kBQUBDi4+NRVFSExo0bo0ePHpg6darGWY8NDAywbt06DBkyBAcOHEBERAQyMjLQoEEDuLi4YMKECXB3d6+wHtbW1ti/fz98fX1x/PhxREdHIz8/HzY2NujduzemTZuGNm3aiHz2REREVNPUioCFkgc+L1iwAAsWLHjlMtzd3bUKUpoYGRnB29sb3t7eOpVDRERE+kvvx2ARERER1TQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARVTN5DJZdVfhjcXfPRFVFaPqrgDRm05iYICsoH2QZqRUd1XeKIYNbGA5cFJ1V4OIaikGLKIaQJqRguLUhOquBhERiYRdhEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRGVV3BXTRrl07rbbr0aMH9u7dq3GbkJAQ+Pr6IiIiAunp6bC2toaLiwvGjx8Pd3d3rY5TXFwMPz8/BAQEICYmBnl5ebCxsUGfPn0wdepUtGnTRqtyiIiISL/pdcASg0wmw4oVK3Dw4EGV15OTk5GcnIygoCB4eXlh9erVMDAov8EvPT0ds2bNQkREhMrrcXFxOHDgAPz9/bFy5Up4eXlV2bkQERFRzVArAtakSZPg7e1d7npTU9Ny123cuFEIV87Ozvjoo4/g4OCAuLg4bN++HVFRUfDz84O1tTUWL16stgypVIr58+cL4Wrw4MHw8vKClZUVbt26hV9++QVpaWlYuXIlbGxstG4RIyIiIv1UKwJWw4YN0bZt20rv9/jxY+zcuRMA0KlTJ/j4+KBu3boAAFdXV3h4eGDKlCmIjIzEjh07MHbsWLRo0aJMOf7+/rhx4wYAwNvbG6tWrRLWubq6ol+/fhgzZgxycnLw7bffom/fvjAyqhW/eiIiIlLjjR7kvnv3bhQXFwMAVqxYIYQrBVNTU6xYsQIoGV+1a9cuteUoQpqVlRWWLl1aZn2LFi0we/ZsAMCTJ08QGBgo+rkQERFRzfHGBiy5XI7g4GAAgJOTE9zc3NRu5+bmBkdHRwBAcHAw5HK5yvrHjx8jOjoaADB06NByuyM9PT2F5aCgINHOg4iIiGqeNzZgxcfHIyUlBQDQvXt3jdv26NEDKBn4Hh8fr7JO0TWovJ06jRs3RsuWLQEAYWFhOtWdiIiIarZaMRDo9OnTOHXqFBISEmBgYIDGjRujc+fO8PT0RK9evdTu8+jRI2HZyclJY/nK62NiYuDg4CD8rGi90rac2NhYJCYmIi8vD2ZmZlqdHxEREemXWhGwlMMSSsY5PXnyBEeOHMHAgQOxfv16WFhYqGyTlJQkLDdp0kRj+crrExMTyy3H1tZWYzlNmzYFSronk5KSKgxkREREpJ/0OmCZmprCw8MDvXv3hqOjI+rVq4f09HSEhoZi//79yMzMRFBQEObOnYudO3fC2NhY2Dc3N1dYrqglSXlcVV5enso65XLq1av3yuVoSyqVvtJ+VHMZGhpWdxXeaPp6TfF9Q2+iqrpeq6JcvQ5YFy5cQP369cu83rdvX0ydOhUff/wxoqKiEBoain379mHatGnCNi9fvhSWlYOXOiYmJsJyQUGByjqxytFW6YlMSb+ZmprC2dm5uqvxRrt//z7y8/OruxqVwvcNvan06XrV64ClLlwpNGrUCP/v//0/DBs2DEVFRfjjjz9UAladOnWE5aKiIo3HKSwsFJZLT+VQuhzlnytTjrZcXFz4zZVIRNo+couIql9VXa9SqVT0Bgy9DlgVcXBwQJ8+fRASEoInT54gOTlZGCel3J1XUXedclou3Z2oXE5ubq7GgKWpHG0ZGhoyYBGJiNcTkf7Qp+u11k/T0KpVK2E5OTlZWFYeuK48UF0d5fWKgerqylEuXx3FAHmJRFLhwHoiIiLSX7U+YEkkErWvt27dWliOiYnRWIby+tJ3/ikHOG3Ladq0KadoICIiqsVqfcBSnqdKeRoFe3t72NjYAACuXbumsQzFeltbW9jb26us69q1q7AcGhpabhnPnz9HbGwsAKBLly6VPg8iIiLSH7U6YMXFxeHvv/8GADRv3lwlYEkkEgwYMAAoaVkKDw9XW0Z4eLjQ8jRgwIAyLWKOjo5CK9bp06fLvbvB399fWB44cKDO50ZEREQ1l94GrLNnzwoPalYnNTUVn376qXCHoLe3d5ltpk+fLgyYW7NmTZmpEwoKCrBmzRoAgJGREaZPn672WDNmzAAAZGZmYsOGDWXWP336FFu3bgVKHvw8aNCgSpwpERER6Ru9vYtw7dq1KCoqwpAhQ+Dm5gY7OzvUrVsXGRkZuHr1Kg4cOICMjAygpBtv8uTJZcpwdHTEzJkzsW3bNkRGRmLSpEn4+OOP4eDggLi4OPz222+IiooCAMycOVN4lmBpnp6eOHToEMLCwuDj44PU1FR4eXnB0tISt2/fxpYtW5CTkwMDAwMsX74cRkZ6+2snIiIiLej1X/qUlBTs3bsXe/fuLXebIUOGYO3atSqTfCpbtGgR0tLScOjQIURFRWHRokVlthk3bhwWLlxY7jEMDQ3x888/Y9asWYiIiMCZM2dw5swZlW1MTEywcuVKuLu7V+ociYiISP/obcBav349QkNDER4ejri4OGRmZiInJwdmZmZo0qSJ8LDnzp07ayzHwMAA69atw5AhQ3DgwAFEREQgIyMDDRo0gIuLCyZMmKBVKLK2tsb+/fvh6+uL48ePIzo6Gvn5+bCxsUHv3r0xbdo0tGnTRsTfABEREdVUehuwevTogR49eohWnru7u86tS0ZGRvD29lY73ouIiIjeHHo7yJ2IiIiopmLAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZDoFrMLCQvFqQkRERFRL6BSw3nnnHaxduxZ3794Vr0ZEREREek6ngJWVlQUfHx+MGTMGnp6e8PHxQVZWlni1IyIiItJDOgWsQYMGwdDQEHK5HHfv3sXatWvxzjvvYPHixbh06ZJ4tSQiIiLSI0a67Lxp0yZkZGTg2LFj8Pf3x71791BYWIhTp07h1KlTaNKkCTw9PeHp6QkHBwfxak1ERERUg+l8F2GDBg0wffp0HDlyBIcPH8bkyZNRv359yOVyJCYm4pdffsGQIUMwbdo0HDt2DC9fvhSn5kREREQ1lKjTNDg7O2PFihW4ePEiNm7ciHfeeQcSiQQymQzXrl3Dv//9b7z99ttYtWoVbt++LeahiYiIiGqMKpkHy8TEBMOGDcNvv/2Gc+fOYeHChWjevDnkcjmys7Ph6+uLCRMmYMSIEdi1axcHxhMREVGtUuUTjdra2mL69On4+OOP0ahRI0gkEgCAXC7Hw4cP8f3338Pd3R3fffcdsrOzq7o6RERERFVOp0HuFbl+/ToOHTqEM2fOID8/HygJVpaWlhgyZAgePnyImzdvoqCgAHv27EFgYCD+/PNPNGnSpCqrRURERFSlRA9YSUlJ8Pf3h7+/P+Li4oCSUCWRSNCjRw94eXlh8ODBMDExAQA8fvwY27Ztw5EjR5CYmIj//ve/WL9+vdjVIiIiInptRAlYhYWFCAwMxOHDh3HlyhXIZDLI5XIAQOPGjTFmzBiMGzdO7VQNjo6O+O6772BnZ4fNmzfj8uXLYlSJiIiIqNroFLBu376Nw4cP4+TJk8L4KblcDkNDQ/Tr1w9eXl549913YWBQ8VCvQYMGYfPmzXj+/LkuVSIiIiKqdjoFrPHjx0MikQitVc2bN8fYsWPh6ekJGxubSpVlamoKlAQ0IiIiIn2mcxehsbExBg0aBC8vL/Tq1euVy7G1tcWePXt0rQ4RERFRtdMpYC1fvhwjR46EpaWlzhWpU6cOevTooXM5RERERNVNp4A1depU8WpCREREVEvoNNHogAEDMHDgQDx58kTrfZ49eybsR0RERFQb6dSClZCQAIlEgqKiIq33KS4uFvYjIiIiqo2q/FE5RERERG+a1x6wFPNl1a1b93UfmoiIiOi1eO0B69ixYwAAOzu7131oIiIioteiUmOwpk2bpvb1ZcuWCROFlqewsBDx8fFIS0uDRCJB3759K1dTIiIiIj1RqYAVGhqqMnM7SmZej4iIqNRBHRwcMHv27ErtQ0RERKQvKhWwunfvrvLztWvXIJFI0LFjR40tWBKJBHXq1EHjxo3RuXNnDB8+HGZmZq9eayIiIqIarFIBa+/evSo/t2/fHgCwfv16tG7dWtyaEREREekpnebBGj16NCQSCerXry9ejYiIiIj0nE4Ba/369eLVhIiIiKiW4ESjRERERCJjwCIiIiISmVZdhB06dABK7gaMiooq8/qrKF0WERERUW2hVcBSnvdKm9eJiIiI3mRaBaz58+dX6nUiIiKiNxkDFhEREZHIOMidiIiISGQMWEREREQi0ylg+fv7v9J+L168wOLFi3U5NBEREVGNpVPAWrZsGRYuXIisrCyt97ly5QpGjhyJU6dO6XJoIiIiohpL5y7CM2fOYNSoUbh8+bLG7YqKirB+/XrMmDEDSUlJkEgkuh6aiIiIqEbSKWBNnz4dAJCUlISZM2fi+++/R1FRUZntHjx4gLFjx2L37t2QyWRo3Lgxtm3bpsuhiYiIiGosnbsId+zYARsbG8hkMuzatQvjxo3Dw4cPhW1+//13eHl54eHDh5DL5Rg0aBCOHTuGt99+W4z6ExEREdU4Ws2DpUmfPn0QEBCAr776CoGBgbh//z7GjRuHefPm4Z9//sHVq1chl8thZmaGL7/8EuPGjROn5kREREQ1lCjTNFhaWmLTpk1Yu3YtzMzM8PLlS2zcuFEIV2+99RaOHDnCcEVERERvBFHnwXrvvffQs2dP4We5XA4LCwusW7cOzZs3F/NQRERERDWWaAHr9u3b8PT0xPnz5wEApqamAICcnByMGzcOfn5+Yh2KiIiIqEbTOWDJ5XL8/PPP8Pb2xpMnTyCXy+Hl5YWLFy/iyy+/hImJCfLz87Fy5UrMnz8fGRkZ4tSciIiIqIbSKWDFx8fD29sbmzdvRnFxMaysrLB582asWbMG9erVw7Rp03Do0CG0b98ecrkcwcHBGDFiBC5evCjeGRARERHVMDoFrJEjRyI8PBxyuRx9+/bFsWPHMHDgQJVtWrduDT8/P8yYMQMSiQSpqamYNWsW1qxZo2vdiYiIiGoknQJWXl4ejI2NVebDUsfY2BhLly7F77//jmbNmkEul+PPP//U5dBERERENZZOAatt27Y4dOiQMKN7RXr27ImjR49i+PDhuhyWiIiIqEbTaaLRgwcPwsTEpFL7WFhY4KeffkL//v11OTQRERFRjaVTC1Zlw5Wy999/X5dDExEREdVYOj8qR9nTp09x8+ZNpKamIj8/H97e3rC2thbzEEREREQ1nigB686dO1i3bh3CwsJUXh86dKhKwPLx8cHmzZthYWGBEydOwNjYWIzDExEREdUoOk80eu7cOUyaNAlhYWGQy+XCP3VGjRqFgoICxMXFCTO+ExEREdU2OgWslJQULF68GIWFhWjdujV+++23Mq1YyszNzeHh4QEAuHDhgi6H1mjDhg1o166d8O/q1asV7hMSEoJ58+ahX79+6NSpE/r164d58+YhJCRE6+MWFxdj37598Pb2Rq9eveDq6oqBAwdi5cqVePjwoY5nRURERPpCpy7CXbt2IT8/H82aNYOPjw/q169f4T49e/bEiRMncOfOHV0OXa67d+9i165dWm8vk8mwYsUKHDx4UOX15ORkJCcnIygoCF5eXli9ejUMDMrPo+np6Zg1axYiIiJUXo+Li8OBAwfg7++PlStXwsvL6xXOioiIiPSJTgHr4sWLkEgkmDFjhlbhCgCcnJyAksfsiE0RloqLi9GwYUOkpaVVuM/GjRuFcOXs7IyPPvoIDg4OiIuLw/bt2xEVFQU/Pz9YW1tj8eLFasuQSqWYP3++EK4GDx4MLy8vWFlZ4datW/jll1+QlpaGlStXwsbGBu7u7iKfOREREdUkOnURPnv2DADg6uqq9T7m5uZAySzwYtuzZw8iIiLg5OSEcePGVbj948ePsXPnTgBAp06dsG/fPgwfPhyurq4YPnw4/vzzT3Tq1AkAsGPHDjx58kRtOf7+/rhx4wYAwNvbG5s2bUK/fv3g6uqKqVOnYt++fTA3N4dMJsO3336L4uJiUc+biIiIahadApZUKgVKWo60lZ2dDQAwMzPT5dBlPHv2DP/73/8AAN98841Wdyju3r1bCDsrVqxA3bp1VdabmppixYoVQMn4qvK6HhUhzcrKCkuXLi2zvkWLFpg9ezYA4MmTJwgMDKz0+REREZH+0ClgNWrUCCgZZ6St27dvAwCaNm2qy6HLWL16NfLy8uDp6YkePXpUuL1cLkdwcDBQ0m3p5uamdjs3Nzc4OjoCAIKDg8vcIfn48WNER0cDJdNSmJqaqi3H09NTWA4KCqrEmREREZG+0SlgdevWDXK5HKdPn9Zq+8LCQhw4cAASiUSrEKStkydP4ty5c+W2IKkTHx+PlJQUAED37t01bquoa3JycpmxY4quQeXt1GncuDFatmwJABrvtCQiIiL9p1PAUrTKnD17Fn///bfGbQsLC/Hvf/8bT58+hUQiwfjx43U5tODFixdYt24dAODzzz/Xeub4R48eCcuKgfflUV4fExOjsk7RelWZchITE6tkDBoRERHVDDrdRdizZ0+89957OHnyJObMmYNp06ZhyJAhwvqEhAS8ePECYWFh8PX1RVxcHCQSCSZOnIg2bdqIUX9s2LABz58/R5cuXbQa2K6QlJQkLDdp0kTjtsrrExMTyy3H1tZWYzmKblG5XI6kpKQKAxkRERHpJ50flbN+/Xrk5uYiJCQEO3fuxM6dOyGRSAAAc+bMEbZTjF0aPHgwli9fruthAQDXr1+Hn58fjIyM8M033wjH1UZubq6wXNGAe+VxVaVbnpTLqVev3iuXoy3FjQVUexgaGlZ3Fd5o+npN8X1Db6Kqul6rolydA5aJiQm2bt0KX19fbN++HU+fPlW7XZMmTTB79mxMmjRJ10MCJV2OK1asgFwux/Tp09G2bdtK7f/y5UthuaI7Dk1MTITlgoKCKilHW6UnMiX9ZmpqCmdn5+quxhvt/v37yM/Pr+5qVArfN/Sm0qfrVZSHPQPA+PHjMX78eDx69AiRkZFIS0uDVCpFgwYN0KFDB3Ts2LFSLUwV2bp1K2JiYtCsWTPMnz+/0vvXqVNHWC4qKtK4bWFhobBceiqH0uUo/1yZcrTl4uLCb65EImrXrl11V4GItFRV16tUKhW9AUO0gKXQunVrtG7dWuxiVURHR2Pr1q0AgK+++uqV5tRS7s6rqLtOOS2XPpZyObm5uRoDlqZytGVoaMiARSQiXk9E+kOfrlfRA9brsHv3bhQVFcHBwQEFBQU4ceJEmW2UH6585coVpKamAgD69+8PMzMzlYHrygPV1VFeX3r+LuVykpOTNd7FqBggL5FIKhxYT0RERPpLLwOWoqstLi6u3OcDKtuyZYuwHBwcDDMzM5VWttJTL5SmvL70nX+tWrVS2a5Dhw4VltO0aVPRZ7InIiKimkOrgHXkyJEqOfjo0aOrpFxt2Nvbw8bGBikpKbh27ZrGbRXrbW1tYW9vr7Kua9euwnJoaCiGDx+utoznz58jNjYWANClSxcRzoCIiIhqKq0C1hdffCHqAHWUdJO9asBav3491q9fr3GbTZs2YfPmzUDJQ6B79uxZ5vgDBgzAvn37EBMTg/DwcLWPywkPDxdangYMGFDm9+Do6IhWrVohOjoap0+fxhdffKH2cTn+/v7C8sCBAyt5xkRERKRPtJ7JXS6Xi/6vuk2fPl0YMLdmzZoyUycUFBRgzZo1AAAjIyNMnz5dbTkzZswAAGRmZmLDhg1l1j99+lQYlN+iRQsMGjRI9HMhIiKimkOrFizFQ5FrG0dHR8ycORPbtm1DZGQkJk2ahI8//hgODg6Ii4vDb7/9hqioKADAzJkzhWcJlubp6YlDhw4hLCwMPj4+SE1NhZeXFywtLXH79m1s2bIFOTk5MDAwwPLly2FkpJdD34iIiEhLWv2lt7Ozq/qaVJNFixYhLS0Nhw4dQlRUFBYtWlRmm3HjxmHhwoXllmFoaIiff/4Zs2bNQkREBM6cOYMzZ86obGNiYoKVK1fC3d29Ss6DiIiIao43vinFwMAA69atw5AhQ3DgwAFEREQgIyMDDRo0gIuLCyZMmKBVKLK2tsb+/fvh6+uL48ePIzo6Gvn5+bCxsUHv3r0xbdo00Z6/SERERDVbrQ1YCxYswIIFC7Te3t3dXefWJSMjI3h7e8Pb21uncoiIiEi/iRqw7ty5g3/++QcPHjxAVlYWAMDS0hJt2rRBnz590KlTJzEPR0RERFQjiRKw7ty5g2+++Ubjc3w2btyITp06YeXKlXBxcRHjsEREREQ1ktbTNJTn9OnTmDhxIiIiIoTpF4yMjNCwYUM0bNgQRkZGwusRERGYNGkSTp06JU7tiYiIiGognVqwYmJisHTpUhQVFcHIyAheXl4YO3YsOnToIMwvJZVKce/ePRw8eBB+fn4oLi7Gv//9b7Rt21blMTNEREREtYVOAeu3335DYWEh6tSpg23btpWZLR0lUxh07NgRHTt2xLBhw/Dxxx+jsLAQ27dvx3fffafL4YmIiIhqJJ26CC9fvgyJRILp06erDVel9ejRA9OnT4dcLsfly5d1OTQRERFRjaVTwEpPTwcA9OvXT+t9FFMhKPYlIiIiqm10CljW1tYAgDp16mi9j4mJCQCgQYMGuhyaiIiIqMbSKWB16dIFADROz1Da7du3AQBdu3bV5dBERERENZZOAeuDDz6AoaEhtm7dqlWXX1paGrZt2wYjIyN88MEHuhyaiIiIqMbSKWC5urrim2++QVpaGry8vBAUFASZTFZmO5lMhqCgIEyYMAHp6en4+uuv4erqqsuhiYiIiGosnaZpWLZsGQCgdevWuHfvHhYsWID69evD2dkZ1tbWkEgkSEtLw927d4VH57Rv3x43btzAjRs31JYpkUiwbt06XapFREREVK10Clj+/v6QSCRASTCSy+XIysrClStXVLaTy+XCNvfu3cO9e/fUlieXyxmwiIiISO/pFLCaNWsmXk2IiIiIagmdAtbZs2fFqwkRERFRLaHzw56JiIiISJVOLVibN28GALz11lt45513xKoTERERkV7TOWBJJBIhaBERERGRjl2EVlZWAAe7ExEREanQKWC1aNECAPD8+XOx6kNERESk93QKWMOGDYNcLsepU6fEqxERERGRntMpYHl7e6N9+/Y4evQoDh8+LF6tiIiIiPSYToPcU1NTsXbtWixfvhzLly/H8ePH8f7776Ndu3aoX78+DA0NNe7PsVtERERUG+kUsDw8PIRH5cjlcly+fBmXL1/Wal+JRIKoqChdDk9ERERUI+kUsKD0nMHSy0RERERvKp0C1nfffSdeTYiIiIhqCZ0Clqenp3g1ISIiIqol+CxCIiIiIpExYBERERGJTOdB7goymQxXr17FzZs3kZqaivz8fCxatAg2NjbCNoWFhZBKpTA0NISJiYlYhyYiIiKqUUQJWOfOncPatWvx7NkzlddnzpypErD8/Pywdu1amJmZ4eLFizAzMxPj8EREREQ1is5dhL6+vpg7dy4SEhIgl8thZWVV7nQNXl5esLCwQF5eHgIDA3U9NBEREVGNpFPAio2NxerVqwEAvXr1wokTJzRONGpiYoLBgwdDLpfj77//1uXQRERERDWWTgFr165dKC4uRuvWrbFt2za0atWqwn26desGALh7964uhyYiIiKqsXQKWFeuXIFEIsH06dO1HrTevHlzAEBiYqIuhyYiIiKqsXQKWMnJyQCA9u3ba72PYmB7QUGBLocmIiIiqrFEmQerMmEpIyMDAGBubi7GoYmIiIhqHJ0Clq2tLQAgLi5O631u3LgBAHBwcNDl0EREREQ1lk4Bq0ePHpDL5fD399dq++zsbOzfvx8SiQS9evXS5dBERERENZZOAWvixImQSCS4du0aDh8+rHHbjIwMzJ07F6mpqTA0NMTEiRN1OTQRERFRjaXTTO7Ozs6YNm0adu/ejeXLl+PChQsYPHiwsP7mzZu4e/cuwsLCcPz4ceTk5EAikWDu3Lmws7MTo/5ERERENY7Oj8r54osvUFhYiH379uHMmTM4c+YMJBIJAGDlypXCdorZ3adPn465c+fqelgiIiKiGkvnuwglEglWrVqFHTt2oEePHpBIJJDL5Sr/AMDNzQ1bt27FsmXLxKg3ERERUY0lysOeAaBv377o27cvcnJycPfuXaSlpUEmk8HKygrt27eHtbW1WIciIiIiqtF0CliZmZlCiDIw+L/GMHNzc3Tv3l2s+hERERHpnUoHrHv37mHLli34+++/kZeXBwAwNjZGt27dMHPmTPTt27cq6klERESkNyo1BisoKAjjx49HYGAgcnNzhTFWhYWFuHz5Mj766CNs27at6mpLREREpAe0DlgpKSnCHYNyuRx169ZFx44d4ebmhvr16wth67///S/CwsKqttZERERENZjWXYS+vr7CPFYffPABFixYIDy4WSqVYv/+/Vi3bh1kMhl27dqFLl26VGW9iYiIiGosrVuw/vnnH0gkEvTv3x///ve/hXAFAIaGhpg8eTI++ugjyOVyXL58uarqS0RERFTjaR2wYmJiAABjxowpd5uxY8cCAHJycpCamipG/YiIiIj0jtYBKycnBwBgb29f7jbKj7/Jzs7WtW5EREREeknrgFVcXAwAMDIqf9iWoaGhsCyVSnWtGxEREZFe0vlROURERESkqtIBS/EgZ7G2IyIiIqptKj2T+4wZMzR2E2q7nUQiQVBQUGUPT0RERFTjVTpgJScna1yvaLnSdjsiIiKi2kbrgNWsWbOqrQkRERFRLaF1wDp79mzV1oSIiIioluBdhEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkci0fhahOh4eHjAwMMCOHTvQokULrfZ59uwZpk6dColEgqCgoFc+dk5ODkJCQhAREYHIyEgkJycjPT0dL1++hIWFBVq3bo1+/fph3LhxaNCgQYXlhYWF4c8//8SNGzeQmpqK+vXro3379vD09MT777+vdb2OHz+Ow4cP4/79+3jx4gUaNWqErl27YvLkyejcufMrny8RERHpD50C1rNnzyCRSFBUVKT1PsXFxUhISIBEItHl0Lh9+zYWL16sdl16ejpCQ0MRGhqKHTt2YMOGDXjnnXfKLWvTpk3YsmULZDKZ8FpqaiouXbqES5cuISAgAP/v//0/1KlTp9wyCgoK8OmnnyIkJETl9WfPnuHZs2c4ceIE5s2bh/nz57/S+RIREZH+0ClgVbemTZuiZ8+e6NixI5o2bYrGjRtDJpMhKSkJZ86cQWBgIDIyMvDJJ5/g4MGDaN++fZky9u/fj82bNwMAmjdvjtmzZ6Nt27ZISUnBnj17cPXqVZw/fx5ffvklfvrpp3Lr8uWXXwrhqmfPnpg2bRpsbGzw4MEDbN26FU+fPsWmTZvQuHFjTJgwoQp/K0RERFTdXnvAys7OBgDUrVtXp3J69uyJ8+fPl7v+vffeQ1BQEObNm4eioiJs3rxZCFIKmZmZ+PHHHwEAzZo1w4EDB2BtbS2s79+/P+bNm4dz587h+PHjGD9+PHr27FnmWJcvX8aJEyeEfX7++WcYGhoCAFxdXeHh4YGxY8fi2bNn+PHHHzF06FBYWlrqdP5ERERUc732Qe7Hjh0DANjZ2elUjiLAaDJw4EA4OjoCAK5fv15mvZ+fnxD4Pv/8c5VwpTjG119/LRxrx44dao+zc+dOAICRkZHK9grW1tb4/PPPAQAvXryAn5+flmdJRERE+qhSLVjTpk1T+/qyZctgamqqcd/CwkLEx8cjLS0NEokEffv2rVxNX1G9evUAAC9fviyzLjg4GABgbm6OQYMGqd2/SZMm6N27Ny5duoTLly8jJycH5ubmwvqcnBxcvnwZANC7d280adJEbTmDBg2Cubk5cnJyEBQUhI8++kiU8yMiIqKap1IBKzQ0FBKJBHK5XHhNLpcjIiKiUgd1cHDA7NmzK7XPq4iJicG9e/cAAE5OTirrCgsLcfv2bQCAm5sbTExMyi2nR48euHTpEgoLCxEZGYlevXoJ6yIiIoRB/j169Ci3DBMTE7i5ueHSpUvCPsbGxjqfIxEREdU8lQpY3bt3V/n52rVrkEgk6Nixo8YWLIlEgjp16qBx48bo3Lkzhg8fDjMzs1evtQb5+flITk7GuXPnsH37dhQXFwMApk+frrJdbGwspFIpoCZ8laa8Pjo6WiVgRUdHq91OHUdHR1y6dAnFxcV48uQJWrduXcmzIyIiIn1QqYC1d+9elZ8Vd+WtX7++WsPC4cOHsWzZsnLXz5o1CyNGjFB5LSkpSVgur1tP3Xrl/Ur/bGtrq7Gcpk2bCsuJiYkMWERERLWUTncRjh49GhKJBPXr1xevRiLq0KEDVq9eDVdX1zLrcnNzheWKWtOU1+fl5b1yOcqtfKXL0Zai1Y1qD21u2KCqo6/XFN839Caqquu1KsrVKWCtX79evJroYODAgejUqRNQMuFnXFwcTp06hcDAQHz22Wf48ssv0b9/f5V9lAe9VzQWSnl8VkFBQbnlaBrHVVE52qrseDeq2UxNTeHs7Fzd1Xij3b9/H/n5+dVdjUrh+4beVPp0vb6WebCePn2KjIwM2NnZoVGjRqKXX79+fZVWNFdXVwwfPhxHjhzBF198gblz5+Lbb7/FmDFjhG2UZ2WvaCb6wsJCYbn0/F3K5ShvV9lytOXi4sJvrkQiateuXXVXgYi0VFXXq1QqFb0BQ6eAlZaWhtOnTwMARo4cCQsLC5X1T548waJFi3D37l2gZLD7gAEDsHbt2tcy0ebo0aNx/vx5nDp1CmvWrIGHhwesrKwApekboEV3nfL60t2AlSlHOXW/6iB/Q0NDBiwiEfF6ItIf+nS96jTR6F9//YU1a9Zgz549ZcJVYWEhPv74Y9y9exdyuRxyuRwymQxBQUGYO3eurvXW2oABA4CS8HPx4kXhdU0D10vTNCBe+efk5GSN5SQmJgrLygPeiYiIqHbRKWD9/fffkEgkaifpPHz4MJ4+fQoA8PDwwPLly9G/f3/I5XKEhYXh5MmTuhxaa8qzsz979kxYbtmypZCEY2JiNJahvL5Vq1Yq65R/rqicx48fAyUzvrdo0ULrcyAiIiL9olPAUgQGNze3MuuOHz8OAOjVqxe2bNmCqVOn4pdffkGfPn0gl8uFZ/dVNeVWJeVuORMTE+HuwvDwcI3jp0JDQ4V9FIPpFVxcXIRB8ort1CksLER4eHiZfYiIiKj20SlgpaenA2rmfyooKEB4eDgkEgnGjx+vsm7s2LEAgKioKF0OrTXFGDEAaNu2rco6RfdhTk4OAgMD1e6flJSk8igc5cfkoOQxO7179wZKHvpcXndjYGAgcnJygJK7HomIiKj20ilgKR6UbGCgWkx4eDiKi4shkUjQp08flXX29vZAyQB5XRw+fFjt8wWV7dq1CyEhIcJxu3XrprLey8tLGDv2008/ISMjQ2W9VCrF119/LcyPMXPmTLXHmTFjBgCguLgY33zzTZn5NNLT0/Hjjz8CJXc8enl5VfJsiYiISJ/odBehmZkZsrOzkZqaqvK6oqusVatWZe4WNDL6v0PqeifA5s2b8f3332Pw4MHo2rUrHBwcUK9ePeTk5ODBgwcICAhAWFgYUDLP1Zo1a8oc08rKCp9//jlWrVqFhIQEjB8/HnPmzEHbtm2RkpKC3bt34+rVqwCA999/Hz179lRbl969e2P48OE4ceIEzp49iw8//BDTp0+HjY0NHjx4gF9//VUY//X555+/ljsoiYiIqProFLCcnJxw69YtXLx4Ee7u7sLrf/31FyQSidqHHyvCmBjzYWVmZsLX1xe+vr7lbtOkSROsW7euTEuawsSJE5GSkoItW7bg6dOn+PLLL8ts4+7ujnXr1mmsy7p165CTk4OQkBBcvXpVCGYKBgYGmDt3LiZMmKD1+b1OMrkMBhKdGjSJiIiohE4By93dHeHh4Thw4ACcnJzQrVs3+Pv749GjR+XeXXjnzh1Ai+f2VWT79u0ICQlBWFgYnjx5grS0NGRmZqJOnTpo2LAhOnTogHfffRfDhg3T+CBqAPj000/x9ttvw8fHBzdu3EBqairq16+P9u3bY8yYMXj//fcrrE/dunWxbds2BAQEwN/fH/fu3cOLFy/QqFEjdO3aFVOmTEHnzp11OueqZCAxgM+ti0jOyaruqrxR2jduhvfadqnuahARkch0ClhTpkzBn3/+iefPn2PNmjUq69zc3NCrV68y+5w7dw4SiQQuLi66HBpOTk5wcnLChx9+qFM5Cl26dEGXLrr/oRsxYkSZB0vri+ScLCS8SK/uarxRbOrVzOd4EhGRbnTqE7KwsMCuXbvg7OwsTCYql8vRrVs3/Pe//y2z/b1794Sp6MvrsiMiIiLSdzo/i7BVq1Y4fPgw4uLikJqaisaNGwt3Cqrz3XffASXzYxERERHVRqI97NnBwQEODg4at2nfvj3at28v1iGJiIiIaiTeNkZEREQkMtFasGQyGa5evYqbN28iNTUV+fn5WLRoEWxsbIRtCgsLIZVKYWhoCBMTE7EOTURERFSjiBKwzp07h7Vr16o8TBklM58rByw/Pz+sXbsWZmZmuHjxosqzAYmIiIhqC527CH19fTF37lwkJCRALpfDysoKcrlc7baKR9Pk5eWV++w/IiIiIn2nU8CKjY3F6tWrgZK7Ak+cOCE8GFkdExMTDB48GHK5HH///bcuhyYiIiKqsXQKWLt27UJxcTFat26Nbdu2oVWrVhXuo3jg8t27d3U5NBEREVGNpVPAunLlCiQSCaZPn671oPXmzZsDABITE3U5NBEREVGNpVPASk5OBkrmt9KWYmB7QUGBLocmIiIiqrFEmQerMmEpIyMDAGBubi7GoYmIiIhqHJ0Clq2tLQAgLi5O631u3LgBlMz8TkRERFQb6RSwevToAblcDn9/f622z87Oxv79+yGRSPgsQiIiIqq1tA5Y7du3h7OzMx49eiS8NnHiREgkEly7dg2HDx/WuH9GRgbmzp2L1NRUGBoaYuLEibrVnIiIiKiGqtRM7qUnEHV2dsa0adOwe/duLF++HBcuXMDgwYOF9Tdv3sTdu3cRFhaG48ePIycnBxKJBHPnzoWdnZ14Z0FERERUg+j8qJwvvvgChYWF2LdvH86cOYMzZ85AIpEAAFauXClspwhn06dPx9y5c3U9LBEREVGNpfNdhBKJBKtWrcKOHTvQo0cPSCQSyOVylX8A4Obmhq1bt2LZsmVi1JuIiIioxhLlYc8A0LdvX/Tt2xc5OTm4e/cu0tLSIJPJYGVlhfbt28Pa2lqsQxERERHVaKIFLAVzc3N0795d7GKJiIiI9IYoE40SERER0f+v0i1Yy5Ytg6mpqc4Hlkgk2L17t87lEBEREdU0lQ5YkZGROh9ULpcLdxoSERER1TaVDlil58IiIiIiIlWVDljHjx9H69atq6Y2RERERLUAB7kTERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiESm9TQNwcHBAABbW9uqrA8RERGR3tM6YNnZ2VVtTYiIiIhqCXYREhEREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyIyquwK6iIiIQEhICMLCwvDo0SOkp6fD2NgYNjY26NKlC8aOHYtu3bppXV5ISAh8fX0RERGB9PR0WFtbw8XFBePHj4e7u7tWZRQXF8PPzw8BAQGIiYlBXl4ebGxs0KdPH0ydOhVt2rTR4YyJiIhIH+htwJo8eTKuX79e5vWioiLExsYiNjYWhw8fxujRo7FmzRqYmJiUW5ZMJsOKFStw8OBBldeTk5ORnJyMoKAgeHl5YfXq1TAwKL/RLz09HbNmzUJERITK63FxcThw4AD8/f2xcuVKeHl5vdI5ExERkX7Q24CVkpICALCxscHQoUPRrVs3NG3aFDKZDOHh4di5cyeSk5Nx5MgRFBcX46effiq3rI0bNwrhytnZGR999BEcHBwQFxeH7du3IyoqCn5+frC2tsbixYvVliGVSjF//nwhXA0ePBheXl6wsrLCrVu38MsvvyAtLQ0rV66EjY2N1i1iREREpH/0NmA5OTlh0aJFGDJkCAwNDVXWubm5YeTIkZg0aRJiY2Nx/PhxTJw4Ed27dy9TzuPHj7Fz504AQKdOneDj44O6desCAFxdXeHh4YEpU6YgMjISO3bswNixY9GiRYsy5fj7++PGjRsAAG9vb6xatUpY5+rqin79+mHMmDHIycnBt99+i759+8LISG9//URERKSB3g5y37p1K957770y4UrB2toaX3zxhfDzmTNn1G63e/duFBcXAwBWrFghhCsFU1NTrFixAigZX7Vr1y615ShCmpWVFZYuXVpmfYsWLTB79mwAwJMnTxAYGKjlmRIREZG+0duApY2ePXsKy0+fPi2zXi6XIzg4GChpEXNzc1NbjpubGxwdHQEAwcHBkMvlKusfP36M6OhoAMDQoUNhamqqthxPT09hOSgo6JXOiYiIiGq+Wh2wCgsLhWV1g9Pj4+OFsVzqug+V9ejRAygZ+B4fH6+yTtE1qLydOo0bN0bLli0BAGFhYVqfBxEREemXWh2wrl27Jiy3atWqzPpHjx4Jy05OThrLUl4fExOjsk7RelWZchITE5GXl6dxWyIiItJPtTZgyWQybNu2Tfh52LBhZbZJSkoSlps0aaKxPOX1iYmJ5ZZja2ursZymTZsCJd2TyvsRERFR7VFrb2PbtWsXbt++DZRMmdCpU6cy2+Tm5grLZmZmGstTHldVuuVJuZx69eq9cjnakEqlld5HG+XdLEBU21XVNVXVeM3Sm6iqrteqKLdWBqzQ0FBh3quGDRvi66+/Vrvdy5cvhWVjY2ONZSpPVFpQUFAl5Wij9CSmYjA1NYWzs7Po5RLpg/v37yM/P7+6q1EpvGbpTaVP12utC1gPHz7E/PnzUVxcjDp16uB///sfGjZsqHbbOnXqCMtFRUUay1UeMF96KofS5Sj/XJlytOHi4sJvrkQiateuXXVXgYi0VFXXq1QqFb0Bo1YFrLi4OMyYMQNZWVkwNDTEf/7zH413Byp351XUXaecmEt3JyqXk5ubqzFgaSpHG4aGhgxYRCLi9USkP/Tpeq01g9yTk5Px4YcfIiUlBRKJBOvWrcPAgQM17qM8cL2iAefK6xUD1dWVk5ycrLEcxQB5iURS4cB6IiIi0k+1ImClp6djxowZiIuLA0pmZB89enSF+7Vu3VpYLj31QmnK60tPxaA8BYS25TRt2vSVWrCIiIio5tP7gJWdnY2PPvpImNPqs88+w+TJk7Xa197eHjY2NkCpObPUUay3tbWFvb29yrquXbsKy6GhoeWW8fz5c8TGxgIAunTpolUdiYiISP/odcDKz8/HrFmzcOfOHQDAnDlzMGvWLK33l0gkGDBgAFDSshQeHq52u/DwcKHlacCAAZBIJCrrHR0dhVas06dPl3uHg7+/v7BcUfclERER6S+9DViFhYWYP3++8MiZadOmYdGiRZUuZ/r06cKguTVr1pSZOqGgoABr1qwBABgZGWH69Olqy5kxYwYAIDMzExs2bCiz/unTp9i6dStQ8uDnQYMGVbquREREpB/09i7Czz77DJcuXQIA9OrVC+PGjcODBw/K3d7Y2Fh4YLMyR0dHzJw5E9u2bUNkZCQmTZqEjz/+GA4ODoiLi8Nvv/2GqKgoAMDMmTOFZwmW5unpiUOHDiEsLAw+Pj5ITU2Fl5cXLC0tcfv2bWzZsgU5OTkwMDDA8uXLYWSkt796IiIiqoDe/pX/66+/hOUrV65g5MiRGre3s7PD2bNn1a5btGgR0tLScOjQIURFRaltCRs3bhwWLlxYbvmGhob4+eefMWvWLERERODMmTM4c+aMyjYmJiZYuXIl3N3dtThDIiIi0ld6G7DEZGBggHXr1mHIkCE4cOAAIiIikJGRgQYNGsDFxQUTJkzQKhRZW1tj//798PX1xfHjxxEdHY38/HzY2Nigd+/emDZtGtq0afNazomIiIiqj94GrPv374tepru7u86tS0ZGRvD29oa3t7do9SIiIiL9oreD3ImIiIhqKgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhKZUXVXQBdpaWm4ffs2bt++jYiICERERCAzMxMA4OnpifXr11eqvJCQEPj6+iIiIgLp6emwtraGi4sLxo8fD3d3d63KKC4uhp+fHwICAhATE4O8vDzY2NigT58+mDp1Ktq0afNK50pERET6Q68DVp8+fUQpRyaTYcWKFTh48KDK68nJyUhOTkZQUBC8vLywevVqGBiU3+iXnp6OWbNmISIiQuX1uLg4HDhwAP7+/li5ciW8vLxEqTcRERHVTHodsJQ1a9YMTk5OuHTpUqX33bhxoxCunJ2d8dFHH8HBwQFxcXHYvn07oqKi4OfnB2trayxevFhtGVKpFPPnzxfC1eDBg+Hl5QUrKyvcunULv/zyC9LS0rBy5UrY2Nho3SJGRERE+kevA9a8efPg4uICFxcXNGrUCPHx8RgwYEClynj8+DF27twJAOjUqRN8fHxQt25dAICrqys8PDwwZcoUREZGYseOHRg7dixatGhRphx/f3/cuHEDAODt7Y1Vq1YJ61xdXdGvXz+MGTMGOTk5+Pbbb9G3b18YGen1r5+IiIjKodeD3D/99FP0798fjRo1euUydu/ejeLiYgDAihUrhHClYGpqihUrVgAl46t27dqlthxFSLOyssLSpUvLrG/RogVmz54NAHjy5AkCAwNfuc5ERERUs+l1wNKVXC5HcHAwAMDJyQlubm5qt3Nzc4OjoyMAIDg4GHK5XGX948ePER0dDQAYOnQoTE1N1Zbj6ekpLAcFBYl2HkRERFSzvNEBKz4+HikpKQCA7t27a9y2R48eQMnA9/j4eJV1iq5B5e3Uady4MVq2bAkACAsL06nuREREVHO90QHr0aNHwrKTk5PGbZXXx8TEqKxTtF5VppzExETk5eVVus5ERERU873RASspKUlYbtKkicZtldcnJiaWW46tra3Gcpo2bQqUdE8q70dERES1xxt9G1tubq6wbGZmpnFb5XFVpVuelMupV6/eK5ejDalUWul9tGFoaFgl5RLVdFV1TVU1XrP0Jqqq67Uqyn2jA9bLly+FZWNjY43bmpiYCMsFBQVVUo42Sk9iKgZTU1M4OzuLXi6RPrh//z7y8/OruxqVwmuW3lT6dL2+0QGrTp06wnJRUZHGbQsLC4Xl0lM5lC5H+efKlKMNFxcXfnMlElG7du2quwpEpKWqul6lUqnoDRhvdMBS7s6rqLtOOTGX7k5ULic3N1djwNJUjjYMDQ0ZsIhExOuJSH/o0/X6Rg9yVx64XtGAc+X1ioHq6spJTk7WWI5igLxEIqlwYD0RERHppzc6YLVu3VpYLj31QmnK60tPxdCqVatKl9O0adNXasEiIiKimu+NDlj29vawsbEBAFy7dk3jtor1tra2sLe3V1nXtWtXYTk0NLTcMp4/f47Y2FgAQJcuXXSqOxEREdVcb3TAkkgkwsOhY2JiEB4erna78PBwoeVpwIABkEgkKusdHR2FVqzTp0+Xe4eDv7+/sDxw4EDRzoOIiIhqljc6YAHA9OnThUFza9asKTN1QkFBAdasWQMAMDIywvTp09WWM2PGDABAZmYmNmzYUGb906dPsXXrVqDkwc+DBg0S/VyIiIioZtDruwivX7+Op0+fCj9nZGQIy0+ePMHhw4dVth8zZkyZMhwdHTFz5kxs27YNkZGRmDRpEj7++GM4ODggLi4Ov/32G6KiogAAM2fOFJ4lWJqnpycOHTqEsLAw+Pj4IDU1FV5eXrC0tMTt27exZcsW5OTkwMDAAMuXL4eRkV7/6omIiEgDvf4rf/DgQZVuN2VhYWFlHqisLmABwKJFi5CWloZDhw4hKioKixYtKrPNuHHjsHDhwnLrYmhoiJ9//hmzZs1CREQEzpw5gzNnzqhsY2JigpUrV8Ld3V3LMyQiIiJ9pNcBSywGBgZYt24dhgwZggMHDiAiIgIZGRlo0KABXFxcMGHCBK1CkbW1Nfbv3w9fX18cP34c0dHRyM/Ph42NDXr37o1p06ahTZs2r+WciIiIqProdcBav3491q9fL1p57u7uOrcuGRkZwdvbG97e3qLVi4iIiPTLGz/InYiIiEhsDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRMWARERERiYwBi4iIiEhkDFhEREREImPAIiIiIhIZAxYRERGRyBiwiIiIiETGgEVEREQkMgYsIiIiIpExYBERERGJjAGLiIiISGQMWEREREQiY8AiIiIiEhkDFhEREZHIGLCIiIiIRMaARURERCQyBiwiIiIikTFgEREREYmMAYuIiIhIZAxYRERERCJjwCIiIiISGQMWERERkcgYsIiIiIhExoBFREREJDIGLCIiIiKRGVV3BWqjhIQE7N27F+fPn0dSUhJMTEzg4OCAYcOGYfLkyTA1Na3uKhIREVEVYsAS2dmzZ7FkyRLk5OQIr+Xn5yMrKwuRkZHw8/PDtm3b0KJFi2qtJxEREVUddhGKKCoqCosWLUJOTg7MzMywaNEi7N+/H7t27cL48eMBALGxsZg1a5ZKACMiIqLahS1YIvr2229RUFAAIyMj7Ny5E507dxbW9e7dGy1atMCGDRsQGxuL33//HQsWLKjW+hIREVHVYAuWSG7fvo3r168DAMaOHasSrhRmzJiBVq1aAQD27NmDoqKi115PIiIiqnoMWCIJCgoSlseOHat2GwMDA4wePRoA8OLFC1y9evW11Y+IiIheHwYskdy4cQMAYGZmho4dO5a7Xffu3YXlsLCw11I3IiIier0YsEQSHR0NAGjevDmMjMof2ubk5FRmHyIiIqpdGLBE8PLlS2RkZAAAmjRponFbS0tLmJmZAQCSkpJeS/2IiIjo9eJdhCLIzc0VlhXhSRNTU1Pk5eUhLy9P62PI5XIAQGFhIQwNDV+xpuUzNDRE03qWMIRE9LKpfA1N60EqlULSoAkMJOL/v1L5JFaNIZVKIZVKq7sqr4TXbPXgNVs9qvp6VZSr+FsrBgYsEbx8+VJYNjY2rnB7ExMTAEBBQYHWx5DJZEDJXFtVpS3M0LZuxQGRRJQDhIeHAw1a/98/er3Cw6u7BjrhNVsNeM1Wn9dwvSr+1oqBAUsEderUEZa1mXqhsLAQAFC3bl2tj2FkZAQXFxcYGBhAIuE3ViIiIrHI5XLIZDKNY6griwFLBPXq1ROWten2y8/PB7TsTlQwMDAQWr6IiIioZuMgdxHUqVMHVlZWgBYD17OysoQQVtGAeCIiItJPDFgiad36//rinz59iuLi4nK3i4mJEZYVs7oTERFR7cKAJZKuXbsCJV2Ed+7cKXe7a9euCctdunR5LXUjIiKi14sBSyQDBw4Ulg8dOqR2G5lMhiNHjgAA6tevj549e762+hEREdHrw4AlEldXV3Tr1g0oCVg3b94ss83OnTuF2dunTZum1ZQOREREpH8kcjFn1XrDRUVFYdKkSSgoKICZmRnmzJmDnj17oqCgACdPnsSBAwcAAC1btsShQ4dgbm5e3VUmIiKiKsCAJbKzZ89iyZIlyMnJUbu+ZcuW2LZtG1q0aPHa6yaGdu3aAQDmz5+PBQsWVEsdPDw8kJCQAE9PT6xfv15lXXx8PAYMGAAA+O677zBmzJhqqSMREb3ZOA+WyDw8PHDs2DHs2bMH58+fR3JyMoyNjdG8eXMMHToUU6ZMgampaZXX4+rVq5g2bVqZ1w0NDWFubg5zc3M0bdoUHTt2RNeuXdG/f/83Yp6t8n4vCmZmZrCxsYGrqyvGjBmD3r17l7utcpjT1oABA7BlyxaV1zZt2oTNmzeX2VYikcDMzAyNGzeGi4sLRo0ahXfeeUen46tz//59rbctr64omautXr16cHBwQI8ePTBhwgSVh5uT/uB1UhavE6osBqwqYGdnh2XLlmHZsmXVXZUypFIpsrKykJWVhYSEBFy/fh27d++GtbU1pk6dilmzZok6k622pk6ditDQUPTo0QN79+597cdXyMvLQ2xsLGJjY3Hs2DGMHj0a69atq5LnP1ZELpcjNzcXubm5iI2NRUBAAAYPHoyffvqpRoZhmUyG7OxsREVFISoqCj4+Pli2bBkmT55c3VUjkfE6eXW8Tt4cDFhvgEmTJsHb21v4OS8vD1lZWbh//z6uXLmCf/75B+np6fjf//6Hc+fOYevWrbC2tlZbVmW+xVWVs2fPilJO6d+LXC5HVlYWwsPDsWvXLqSlpeHIkSNo0qQJFi1apLGsAQMGYOHChRUes6Jxd+vWrYOLiwtQ8kGclJSEmzdvYteuXSgoKMBff/2F7777DqtWrYKtrS0CAgLKLWvEiBEAgE6dOuG7776rsG6VpVxXRX1TUlJw4cIF7N+/H0VFRVi9ejUcHR3Rp08f0Y9PrwevE93wOnlzMWC9ARo2bIi2bduWed3d3R2zZs3Co0ePsGTJEkRFReH27duYN28edu/eXSO//YmpvN9Ljx494OHhgTFjxuDly5fYu3cv5s2bp/H3Ub9+fbVlVZa9vb1KOe3bt8e7776LIUOGwMvLC8XFxfD19cXcuXPRuHFjrY5pZmYmSt0qqquivv369UOHDh3w5ZdfAgB27NjBPxx6jNeJuHUFr5M3BqdpILRu3Rr79u2Ds7MzACAsLAx//vlndVerWrVu3RrvvvsuACA3N1dlBv7q4OzsjPfeew8AUFxcjNDQ0GqtT0XGjh2LBg0aAAAiIiKquzpURXid6IbXSe3GFiwCANStWxc//PADRowYAblcjh07dmDy5Mll5uqq6C7CFy9ewMfHB+fPn0dMTAzy8vJgYWEBa2trODo6om/fvhg8eDAaNWoEAPjiiy/g7+8v7B8aGiocQ8HOzk6lW1DTXYRisrOzE5YLCwur7DjaUv4WnJiYWK110YadnR0yMjI0/u7Cw8Nx7tw5hIWFISYmBllZWTAxMUGTJk3QvXt3TJ06VXgMVXkeP36MP/74A1evXkVCQgKKiopgZWWFhg0bwtnZGe+88w4GDhxYbsvK8+fP8ccff+DixYuIj49HXl4eGjZsCDc3N0yYMIGtChXgdaIbXie1FwMWCdq0aYO+ffvi0qVLSElJQURERKUe5xMdHY0PPvgAKSkpKq9nZGQgIyMD0dHRCAoKgkwmw5QpU6rgDMT17NkzYblZs2bVWhcAKmG3Om5EqCzF769p06Zq1x8+fFjtjSBFRUWIjo5GdHQ0/Pz8sHz58nIHAJ86dQpLlixBUVGRyuvPnz/H8+fPce/ePRw+fBgBAQFqu3+OHTuGVatWCQ9gV0hKSsLp06dx+vRpjBs3Dt98841e/M6rA68T3fA6qb34myAVvXv3xqVLlwAA169fr1TAWrJkCVJSUmBsbAwvLy/069cPjRo1glwuR1JSEsLDwxEUFKSyz6JFizBjxgwsW7YMkZGRageaVseM99HR0Th//jwAwM3NTWhxq07K3S/29vbVWpeK+Pv7Iz09HSgZ2KyOVCqFpaUlBgwYgG7duqFFixYwMzNDSkoK7ty5g7179yIjIwNr1qyBk5NTmakAUlNT8eWXX6KoqAgNGzbE5MmT4ebmhgYNGqCgoABPnz5FaGgogoOD1R7/5MmTWLp0KeRyORwcHDBlyhS0atUK1tbWSEhIwMGDBxESEoKDBw/C3Ny8Rt4VXN14neiG10ntxoBFKjp27Cgsx8bGar1fXFyc8JDrL774okwLlaurKwYPHowlS5bgxYsXwuu2trawtbWFmZkZUIUDTdVJS0vDgwcPhJ/lcjmys7Nx8+ZN7N69GwUFBbCwsNDqA+PFixcqZZXH3t5eONfKSExMFO6Eql+/vsZ5h16X+Ph4YfwISu6OSk1NxYULF4QxfG3atMGMGTPU7t+vXz+8//77ZeaFc3Z2xrvvvotp06Zh8uTJuH//PjZt2lTmnM+fPy98o961a1eZ902XLl0wevRoFBQUlDl2eno6Vq5cCblcjrFjx2L16tUq37w7duyIwYMHY+PGjfj111+xZ8+eN3a+Il4nuuF18uZiwCIVVlZWwrJyEKrI8+fPhWXFMxnVkUgksLS01KGG4tm3bx/27dundp2BgQEmTpyIDz74AI6OjhWWFRwcXO43QGV79uzR+iHfitvPb9y4gf/85z/Ch+S//vUv1KtXT6syqpLi7id1LCwsMH/+fHh7e6N+/fpqt7G1tdVYvoWFBT799FPMmzcPN27cQEZGhsofqtTUVACApaWlxlBet27dMq/t27cP2dnZsLW1xddff11ut8aCBQvg7++P5ORkHD16tMJpCGojXie64XXy5mLAIhXK3xpzc3O13q9x48bCsr+/v943E8tkMpw8eRJ16tTB559//tqmrPj/2rv3oKir9w/gb5aAQBCUi1Y2UooogqIBijcCERS5iZhahozpmFlmpRnNqH3HccoYUwflYpdZNBVESAYpEQxBuSmOBSgKVOoSXhBYBES5yO8f+Px2gV1AFrDl/fpr2T2fswf0wLOf85znKKuebWZmhg0bNmDJkiX9MpbeqKmpQUxMDIyMjLBs2bJuXfPo0SNUVlaivr4ebSd4yS4PX79+Xe7Tedv/uerqaqSkpMDV1bXb42vbNPHmm28q/bd94YUXYGtri6SkpE4PcB/sOE96h/NEvTHAIjmyQVVPDqN+9dVXYWdnh9zcXIjFYly4cAFubm5wcHCAra1tvxwP1FOd7YR8/Pgxbt26hfj4eERGRiIyMhIFBQX48ccflX4Pfb2jEa1LBd7e3n36Hj3R/i5DS0sLamtrcf36dURGRiI5ORnbt2/HP//8ozDgrqyshFgsRlJSEm7dugVlR6NWVVXJfe3i4oKhQ4fi4cOH+PDDD4W6THZ2dpgwYYLCquLNzc24fv06ACA6Olo4hL0rbXcCBhvOk97hPBm8GGCRHNnJ2dOlvO+++w4ff/wxrly5gpKSEpSUlCA0NBRaWlqYPHkyPD094efnBx0dnT4YuWq8+OKLsLS0xOeffw5zc3Ns3boVly9fRnh4eL/c9pat+tyWgBodHY2LFy/ixIkTePDgAcLDw6GhodHnY+kpDQ0NGBgYwN7eHvb29vjss89w6tQpiMViODk5ddjGXVBQgPfeew9SqbRb/T958kTu62HDhiEsLAyffvop7t27h5ycHOTk5ACtHw4cHR2xePFiODs7y11XXV2NpqamHn9/neWoDFacJ8+O82TwYIBFcq5duyY87k5OhawRI0YgKioKWVlZOHPmDC5duoSSkhI0NjYiNzcXubm5+Omnn3Dw4MEe9z0Q/P39sXv3bkilUsTGxvbLH472VZ8nTZqEhQsX4ssvv0RcXBzOnTuHyMhIBAYG9vlYemv16tU4deoUACA2NlbuD0dDQwM2btwIqVQKLS0trFixAnPnzoW5uTkMDQ2F5QiJRCIsaXT2qd3Ozg7JyclISkpCWloacnNzcffuXdTW1iI5ORnJycmYNWsW9u/fL9xZaW5uFq5fsmSJ0uUmWQOxm/W/gPOkdzhP1BcDLJKTmZkpPH7jjTeeqQ9HR0chB6CqqgpZWVmIjo5GdnY2bt++jU8++QQnT55U2Zj7ikgkwujRoyGVSlFeXt4hebS/aGhoYNu2bcjOzkZZWRn2798PX19fuQ0JzyPZnUTtd45lZ2dDIpEAALZv364wX6Y7n9p1dHTg7e0tLAtJJBKkpaXh8OHDuHnzJi5cuIA9e/YIycayd2ZbWlr6bdequuI86R3OE/XFo3JIUFRUhKysLKC16J21tXWv+xw2bBg8PDwQGRkJFxcXAEBhYWGPSkAMJNlb5LKf6Pqbrq4uPvjgA6A1MfaHH34YsLF0l+zPrv1SQ0lJifB4wYIFCvsoKCjo8fu21eqJjY3FyJEjgdZCi220tbVhYWEBtB4LRb3HefLsOE/UFwMsAlrXzbds2SLcXl61apXKK/LK7mxpn4jZlpf1PBy10aa+vh5//fUX0JpzMhCfymX5+voKlbKPHj3a7ZyMgSL7S799lWrZPyT19fWdXv/06VPExMQ88/vr6+sLeTqdJf6itSjl+fPnn/k9iPOktzhP1BcDLEJJSQnefvttIf/KwcEBy5cv71EfhYWFKCwsVPh6S0uLsPyooaEhd34ZZLYSSyQSpTtk+lNISIiQsDlr1iyFu236i5aWFlavXg207vY8dOjQgI5HmYaGBuzbt0/42snJSe51c3Nz4bHsWZSydu/eLRSv7cz58+c7HMskq6amBnl5eUAnFb0DAgKEkiRBQUEoLi5W+v2cO3dO2FFF8jhPnh3niXpjDtYg0L4Sc319Paqrq3Hjxg1kZ2cjIyNDCGpsbW2xb9++HicqFhYWIigoCDY2NnB2dsbEiRNhYmKCpqYmlJaWIi4uDhkZGUDrpyIzMzO566dOnYq4uDhUVFTg66+/hre3NwwMDIDWGivtAzJVaP9zQesOnFu3buHkyZPCJzYdHR1s2LBBaV/drVCtqamJMWPGPPOY/f39ERYWJhy8umrVqh6V01Cl9hWqAaC2thaFhYU4duyY8Mt49OjRWLp0qVy7WbNmwdjYGBUVFdi7dy9KS0sxb948DBs2DLdv38bx48eRlZWFqVOnKlyeSExMxLp16zBjxgzMnDkT48aNg6GhIerq6lBUVIQjR47g3r17ANChxpCJiQl27dqFDRs2oLy8HIsXL8aiRYswZ84cjBw5Ek1NTbh79y7y8vKQlJQEiUSC8PBwjB8/XsU/xecf50nvcJ4MXgywBgFllZjbDB8+HCtXrsTq1at7tTSYn5+P/Px8ha9PmTIFO3fu7PC8h4cHIiIiIJFIhLo6bV555RWh4J0qdffnEhwcDEtLS6Xtuluh2sDAALm5uT0eaxsdHR0EBgYiODgY1dXVOHLkCNauXfvM/fWGsgrVbcaPH48DBw50qBKtp6eHXbt2Yf369Xjy5EmndXYcHBywbds2eHp6Kuy/sbERaWlpSEtLU9hm2bJlne6AcnNzQ2hoKIKCgiCVShEVFYWoqKhO+xCJRM9lLbf+wHnSO5wngxcDrEFGJBJhyJAhMDAwwMsvv4yJEyfCzs6uy0q9XfH09ISxsTEyMzORn5+Pe/fuoaKiAk1NTTA2NoaVlRU8PDywcOFCiEQdV6aHDBmCqKgoREREICMjA2VlZQpzDvqSlpYWjIyMMHbsWDg5OcHPz++5OdqnzfLly/H9999DKpVCLBYjICDgufmlpquri+HDh2PixIlwd3fH/PnzFQbss2fPRmxsLA4ePIjs7GxUVVXBwMAAY8eOhZeXF/z9/VFWVqbwvYKCgjBjxgxkZ2fjxo0bKC8vR2VlJTQ1NTFy5EhMmTIF/v7+So9ucnFxwdmzZ3H8+HGkpaWhpKQE1dXV0NTUhImJCSwsLDB9+nS4u7t3yI8ZzDhPeofzZHDQaHleEl6IiIiI1AST3ImIiIhUjAEWERERkYoxwCIiIiJSMQZYRERERCrGAIuIiIhIxRhgEREREakYAywiIiIiFWOARURERKRiDLCIiIiIVIwBFhEREZGKMcAiIiIiUjEGWEREAywkJASWlpawtLTss/dwcXGBpaUlvvjiiz57DyL6f50f301E9B+Qk5ODgIAA4Ws9PT1kZmZCV1dX6XWPHz/GzJkzUVtbKzx36NAhTJs2rU/HS0SDB+9gEZHaePToEVJSUrpsd/bsWbngiohI1RhgEZFa0NHRAQDEx8d32batTds1RESqxgCLiNSCi4sLACAzMxPl5eUK21VUVCAjIwMAMHfu3H4bHxENLgywiEgtzJw5E6ampmhubkZiYqLCdqdOnUJTUxNMTU0xY8aMfh0jEQ0eTHInIrWgqamJhQsXQiwWIz4+HoGBgZ22a1se9PT0hKamZpf9NjQ0ICYmBqdPn0ZxcTFqa2thaGgIKysreHp6wsvLCyKR8s+qd+/eRUREBNLT03H//n0YGhrC2toaAQEBPQryampqcPToUaSmpuLmzZuora2FkZERrK2t4evrC3d3d2hoaHS7PyLqOwywiEht+Pj4QCwW49q1ayguLoaFhYXc6yUlJbh69arQtrCwUGl/paWlWLNmDf7++2+55x88eID09HSkp6cjOjoaoaGhMDIy6rSP3NxcrF27Vi6pvry8HKmpqUhNTcVHH33Ure8tKysLGzduhFQqlXteti8nJyfs2bMHQ4YM6VafRNR3uERIRGrDyspKCKo6S3Zve27cuHGYMGGC0r7q6uoQGBgoBFeurq4ICwtDbGws9u3bBwcHBwDA5cuX8f7776O5ublDH2VlZUJwJRKJsGzZMojFYpw4cQI7d+6Eubk5QkJCcO7cOaVjuXz5MtasWQOpVAoTExNs3LgR4eHhiIuLQ3h4OLy9vQEAaWlprHNF9JxggEVEasXHxwdozbVqaWkRnm9paUFCQoJcG2X2798PiUQCAFi3bh0OHDgAFxcXWFtbY/78+Th06BC8vLwAAFeuXEF0dHSHPr755hvhzlVwcDD+97//wdHRETY2NvD390dsbCzGjx+PgoICheNobGzE5s2b0djYiNmzZyMlJQXr1q2Ds7MzJk6cCGdnZwQHB2PHjh0AgDNnzghJ/EQ0cBhgEZFa8fb2hkgkwp07d5CTkyM8n5OTgzt37kAkEgmBkSINDQ04ceIEAMDCwqLTZTwNDQ189dVXwtLgkSNH5F4vLy8XanI5OzvD09OzQx/6+vpCYKRIYmIi/v33X+jo6ODbb79VWET1rbfewqRJkwAAcXFxSvskor7HAIuI1MqIESOEiuyyy4Rtj6dPn44RI0Yo7aOgoAAPHz4EACxatEhhMry+vj4WLFgAtOZ33b9/X3gtJydHWDb08/NT+F6TJk3qkCsm6/fffwcA2NvbY/jw4UrHbWdnBwD4448/lLYjor7HJHciUju+vr7IysrCmTNnsH37dgBAUlIS0M3lweLiYuHx5MmTlbadPHkyjh07JlxnZmYGACgqKhLa2NjYKO3DxsZG7j1ltS0fXrhwodtnFT548KBb7Yio7/AOFhGpnXnz5kFXVxe1tbU4e/YsUlJSUFdXBz09Pbi5uXV5fXV1tfC4q7tGJiYmnV4nu9vP2Ni42320V1lZ2eV423v8+HGPryEi1eIdLCJSO0OGDIGrqysSEhIQHx8vJLu7urpCT0+vR30NdF2ptmXGOXPmYPPmzQM6FiLqPgZYRKSWfH19kZCQILejztfXt1vXGhoaCo8rKirw2muvKWwruxwne137Pl566aVu9dGekZER7t+/j8bGRowbN65b4yeigcclQiJSS46OjjA1NUVTUxOamppgZmYGR0fHbl0rm3T+559/Km2bl5fX6XWywVB+fr7SPpSVabCyshLaNDQ0dDFyInpeMMAiIrWkqakJHx8faGtrQ1tbGz4+Pl0eadPG2toaQ4cOBQCcPHkST58+7bRdbW0tfvvtNwDA2LFjhQR3AJg2bZqw+/CXX35R+F55eXlyCfHttR1iXVNTw/ILRP8hDLCISG1t3rwZ+fn5yM/Px6ZNm7p9nba2Nvz9/YHW3YChoaEd2rS0tGDHjh2oqqoCALzzzjtyr5uZmWHu3LlAa6mFX3/9tUMfdXV1wi5HRRYtWiQsL+7atQuXLl1S2j43NxcXL17s8nskor7FHCwiok6sX78eycnJkEgkCAkJQVFREfz8/GBqaorS0lL8/PPPQiAzZcoULF26tEMfW7ZsQUZGBurq6rBp0yZcunQJ7u7u0NfXx40bN3Dw4EHcvHkT1tbWCpcJtbW1sXfvXrz77rt49OgRVq5cCQ8PD7i6umLUqFF4+vQpysvLcfXqVSQnJ6OoqAhbt24VjvIhooHBAIuIqBP6+voQi8XCYc9JSUlCLS1ZU6dORVhYWKfFSEeNGoWwsDCsW7cOdXV1OHr0KI4ePSrXZv369dDQ0FCah2Vra4vDhw9j48aNuHPnDhISEoRjfxSNnYgGFgMsIiIFRo0ahfj4eMTExOD06dMoKipCXV0dDA0NMWHCBHh5ecHLy0tpbte0adOQmJiIiIgIpKen4/79+zA0NIS1tTVWrFiB2bNnIyQkpMux2Nra4syZM4iLi0NqaiquXbuGqqoqiEQiDB8+HGPGjIG9vT3c3Nzw+uuvq/gnQUQ9pdEiexoqEREREfUak9yJiIiIVIwBFhEREZGKMcAiIiIiUjEGWEREREQqxgCLiIiISMUYYBERERGpGAMsIiIiIhVjgEVERESkYgywiIiIiFSMARYRERGRijHAIiIiIlIxBlhEREREKsYAi4iIiEjFGGARERERqRgDLCIiIiIV+z9YYoPChakdaQAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 600x800 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "riddles_ltr_df = pd.DataFrame({\n", + " 'model': ['DistilBERT Base', 'DistilBERT Base', 'BERT Base', 'BERT Base'],\n", + " 'order': [0, 0, 1, 1],\n", + " 'direction': [\"LTR\", \"RTL\", \"LTR\", \"RTL\"],\n", + " 'ppl': [290, 530, 620, 690],\n", + "})\n", + "\n", + "riddles_ltr_df_sorted_pairs = riddles_ltr_df.sort_values(by=['order', 'direction'])\n", + "\n", + "# Plot configuration\n", + "plt.figure(figsize=(6, 8))\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "# Create bar plot with LTR and RTL next to each other, no error bars (ci=None)\n", + "sns.barplot(x='model', y='ppl', hue='direction', data=riddles_ltr_df_sorted_pairs, dodge=True, palette=\"Set2\", ci=None)\n", + "\n", + "# Adjustments to the plot\n", + "# plt.xticks(rotation=45)\n", + "plt.title(\"Perplexity vs Model Size, From MLM\", fontsize=20)\n", + "plt.xlabel(\"Model\", fontsize=20)\n", + "plt.ylabel(\"Test Perplexity\", fontsize=20)\n", + "plt.legend(title=\"\", fontsize=20)\n", + "plt.tick_params(axis='both', labelsize=20)\n", + "\n", + "# Display the updated plot\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "execution_state": "idle", + "id": "8e5325e7-85ed-4cda-b24a-9f3248dec10b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_480557/2459623878.py:14: FutureWarning: \n", + "\n", + "The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.\n", + "\n", + " sns.barplot(x='model', y='ppl', hue='direction', data=transfer_wikitext_df.sort_values(by=['order', 'direction']), dodge=True, palette=\"Set2\", ci=None)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 600x800 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "transfer_wikitext_df = pd.DataFrame({\n", + " 'model': ['67M', '67M', '110M', '110M', '335M', '335M'],\n", + " 'order': [0, 0, 1, 1, 2, 2],\n", + " 'direction': [\"LTR\", \"RTL\", \"LTR\", \"RTL\", \"LTR\", \"RTL\"],\n", + " 'ppl': [24.4, 24.4, 21.8, 21.9, 17.7, 18.1],\n", + "})\n", + "\n", + "\n", + "# Plot configuration\n", + "plt.figure(figsize=(6, 8))\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "# Create bar plot with LTR and RTL next to each other, no error bars (ci=None)\n", + "sns.barplot(x='model', y='ppl', hue='direction', data=transfer_wikitext_df.sort_values(by=['order', 'direction']), dodge=True, palette=\"Set2\", ci=None)\n", + "\n", + "# Adjustments to the plot\n", + "# plt.xticks(rotation=45)\n", + "plt.title(\"Perplexity vs Model Size, From Scratch\", fontsize=20)\n", + "plt.xlabel(\"Model Size\", fontsize=20)\n", + "plt.ylabel(\"Test Perplexity\", fontsize=20)\n", + "plt.ylim(0.0, 122.75062123923252)\n", + "plt.legend(title=\"\", fontsize=20)\n", + "plt.tick_params(axis='both', labelsize=20)\n", + "\n", + "# Display the updated plot\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bc44c20-d2a8-431a-97cc-a43655e1f856", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/riddles.txt b/data/riddles.txt new file mode 100644 index 0000000..43787f3 --- /dev/null +++ b/data/riddles.txt @@ -0,0 +1,40 @@ +The more you take, the more you leave behind. What am I? Footsteps. +The more you have of me, the less you see. What am I? Darkness. +I am not alive, but I grow. I don’t have lungs, but I need air. I don’t have a mouth, but water kills me. What am I? Fire. +I get wetter the more I dry. What am I? A towel. +I become shorter the longer I live. What am I? A candle. +I am light as a feather, yet the strongest man can’t hold me for much longer than a minute. What am I? Breath. +I am invisible, but you can feel me. I am intangible, but you can hear me. What am I? The wind. +I have keys but open no locks. What am I? A piano. +The more of me there is, the less you see. What am I? Fog. +I am always in front of you, but you can never see me. What am I? The future. +I don’t have wings, but I can fly. I don’t have eyes, but I can cry. Wherever I go, darkness follows me. What am I? A cloud. +I am not alive, but I can die. What am I? A battery. +I can be stolen, but I can’t be touched. What am I? A heart. +I am something that comes once in a minute, twice in a moment, but never in a thousand years. What am I? The letter "M." +I can be cracked, but never broken. What am I? A promise. +I am always with you, but I can’t be seen. What am I? Your shadow. +I’m small but I can cover miles. What am I? A footprint. +The more you have of me, the less you know. What am I? A secret. +I’m something that can fill a room, but I don’t take up space. What am I? Light. +I am always with you, but you never see me. I can be forgotten, but I never leave. What am I? Your name. +The more of me there is, the less you hear. What am I? Silence. +I can travel around the world while staying in the corner. What am I? A stamp. +I am something that can be cracked, but I can’t be touched. What am I? A code. +I am something you can hear, but not touch. I can be loud or soft, but I can never be seen. What am I? Sound. +I am something that you can never keep, no matter how hard you try. What am I? The present moment. +I am not alive, but I grow. I don’t have a mouth, but I can speak. What am I? A rumor. +I am always running, but I never move. What am I? A clock. +I get bigger the more you take away. What am I? A hole. +I can’t be seen, but I can be felt. I have no color, but I make things clear. What am I? Understanding. +I get smaller the more you use me. What am I? A pencil. +I can be broken without being touched. What am I? A promise. +I am something that everyone has, but no one can keep forever. What am I? Time. +I can be light as a feather, but even the strongest hands cannot hold me. What am I? A thought. +The more you take from me, the greater I become. What am I? A debt. +I am often in front of you, but I’m never within reach. What am I? The horizon. +You can see me every day, but I will never be seen the same way twice. What am I? The sky. +I am not alive, but I grow over time. What am I? A reputation. +I can’t be touched, but I can touch everything. What am I? A feeling. +I never speak, but I can communicate. What am I? A look. +I can be hard, but I am not solid. What am I? A deadline.
\ No newline at end of file diff --git a/data/wandb_export_2024-12-04T19_56_43.325-05_00.csv b/data/wandb_export_2024-12-04T19_56_43.325-05_00.csv new file mode 100644 index 0000000..0793204 --- /dev/null +++ b/data/wandb_export_2024-12-04T19_56_43.325-05_00.csv @@ -0,0 +1,21 @@ +"Name","val_loss" +"distilbert_base_japan_rtl","2.8326140656842465" +"distilbert_base_japan_ltr","2.8237654270093375" +"bert_6M_rtl_scratch","4.744475745069383" +"bert_6_ltr_scratch","4.761364663504469" +"bert_11_rtl_scratch","4.446949723712903" +"bert_11_ltr_scratch","4.462378635840655" +"bert_19_rtl_scratch","4.177320378220149" +"bert_19_ltr_scratch","4.186270630920852" +"bert_35_rtl_scratch","3.927856646112007" +"bert_35_ltr_scratch","3.941595227497572" +"qa_distilbert_base_ltr_v2","3.1502674087524416" +"qa_distilbert_base_rtl_v2","3.1904524799346925" +"qa_ltr_distilbert_base","3.3259500965491715" +"distilbert_base_ltr_scratch","3.6863074678864063" +"distilbert_base_rtl_scratch","3.6885659350549624" +"deep-monkey-11","3.009245432539425" +"distilbert_base_ltr_4epoch","3.1961001348322804" +"distilbert_base_rtl_4epoch","3.19366226070481" +"bert_base_ltr_4epoch","3.082235844222857" +"bert_base_rtl_4epoch","3.0881099989546192"
\ No newline at end of file diff --git a/finetune_QA.py b/finetune_QA.py new file mode 100644 index 0000000..e5b8ef7 --- /dev/null +++ b/finetune_QA.py @@ -0,0 +1,304 @@ +""" +accelerate launch --mixed_precision bf16 finetune_QA.py \ +--model_direction rtl \ +--checkpoint_path /home/sipb/nlp-class-project/checkpoints/distilbert_base_rtl/epoch_3_checkpt \ +--tokenizer_name distilbert/distilbert-base-uncased \ +--warmup_steps 100 \ +--learning_rate 1e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/qa_distilbert_base_rtl/ \ +--eval_steps 38 \ +--block_size 128 \ +--num_train_epochs 50 \ +--weight_decay 1e-4 + + +accelerate launch --mixed_precision bf16 finetune_QA.py \ +--model_direction ltr \ +--checkpoint_path /home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt \ +--tokenizer_name distilbert/distilbert-base-uncased \ +--warmup_steps 100 \ +--learning_rate 1e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/qa_distilbert_base_ltr/ \ +--eval_steps 38 \ +--block_size 128 \ +--num_train_epochs 50 \ +--weight_decay 1e-4 + +accelerate launch --mixed_precision bf16 finetune_QA.py \ +--model_direction ltr \ +--checkpoint_path /home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt \ +--tokenizer_name distilbert/distilbert-base-uncased \ +--warmup_steps 100 \ +--learning_rate 1e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/qa_distilbert_base_ltr_overfit/ \ +--eval_steps 50 \ +--block_size 128 \ +--num_train_epochs 1000 \ +--weight_decay 0 +""" + + + +import argparse +import math +import os +from collections import defaultdict + +import accelerate +import torch +import transformers +import wandb +from datasets import load_dataset +from torch.utils.data import Dataset, DataLoader +from transformers.data.data_collator import default_data_collator +from tqdm.auto import tqdm + +from utils import preprocess_datasets, convert_to_torch_dataset, add_attn_hooks, causal_loss_wrapper + +#### HERE WE do the dataset stuff +class DatasetAQ(Dataset): + def __init__(self, qa_pairs, text_direction, tokenizer): + self.qa_pairs = qa_pairs + self.text_direction = text_direction + self.tokenizer = tokenizer + + def __getitem__(self, idx): + question, answer = self.qa_pairs[idx] + sentence = torch.cat([question, answer], dim=0) if self.text_direction.lower() == "rtl" else torch.cat([answer, question], dim=0) + + # TODO: length + num_to_pad = self.tokenizer.model_max_length - sentence.size(0) + assert num_to_pad >= 0, (sentence.size(), self.tokenizer.model_max_length) + + if num_to_pad > 0: + pad_tokens = torch.full((num_to_pad,), self.tokenizer.pad_token_id, dtype=sentence.dtype) + pad_labels = torch.full((num_to_pad,), -100, dtype=sentence.dtype) + + if self.text_direction.lower() == "rtl": + input_ids = torch.cat([pad_tokens, sentence], dim=0) + labels = torch.cat([pad_labels, sentence], dim=0) + attention_mask = torch.ones_like(input_ids, dtype=torch.bool) + attention_mask[:num_to_pad] = 0 + else: + input_ids = torch.cat([sentence, pad_tokens], dim=0) + labels = torch.cat([sentence, pad_labels], dim=0) + attention_mask = torch.ones_like(input_ids, dtype=torch.bool) + attention_mask[-num_to_pad:] = 0 + + return { + "input_ids": input_ids, + "labels": labels, + "attention_mask": attention_mask, + } + + def __len__(self): + return len(self.qa_pairs) + +#### + + + +def parse_args(): + """ + Re-using HuggingFace arguments when possible (most of the help strings are directly copied). + https://github.com/huggingface/transformers/blob/7bbc62474391aff64f63fcc064c975752d1fa4de/examples/pytorch/language-modeling/run_clm.py#L75 + """ + parser = argparse.ArgumentParser() + + # Model + parser.add_argument("--model_direction", type=str, required=True, choices=["ltr", "rtl"], + help="Whether to train a left-to-right or right-to-left LM.") + parser.add_argument("--checkpoint_path", type=str, + help="Path to load model weights from.") + + # Data + parser.add_argument("--tokenizer_name", type=str, + help="Name of tokenizer to load.") + parser.add_argument("--dataset_name", type=str, default="truthfulqa/truthful_qa", + help="The name of the dataset to use (via the datasets library).") + parser.add_argument("--dataset_config_name", type=str, default="generation", + help="The configuration name of the dataset to use (via the datasets library).") + # TODO: block_size, train on shorter seqs? + parser.add_argument( + "--block_size", + type=int, + help="Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) + + # Training + parser.add_argument("--train_from_scratch", action="store_true") + parser.add_argument("--output_dir", type=str, required=True, + help="The output directory where the model predictions and checkpoints will be written.") + parser.add_argument("--per_device_train_batch_size", type=int, default=8) + parser.add_argument("--per_device_eval_batch_size", type=int, default=16) + parser.add_argument("--gradient_accumulation_steps", type=int, default=1) + parser.add_argument("--num_train_epochs", type=int, default=1) + parser.add_argument("--learning_rate", type=float, required=True) + parser.add_argument("--warmup_steps", type=int, default=0) + parser.add_argument("--scheduler", type=str, default="cosine") + parser.add_argument("--weight_decay", type=float, default=0.0) + parser.add_argument("--logging_steps", type=int, default=1, + help="Number of update steps between two logs.") + parser.add_argument("--eval_steps", type=int, default=20000, + help="Number of update steps between two logs.") + parser.add_argument("--dataloader_num_workers", type=int, default=8) + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + transformers.set_seed(42) + + accelerator = accelerate.Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, log_with="wandb", project_dir=args.output_dir) + # Will `add_attn_hooks` to `model` later + + # Load model weights in both cases, but re-initialize if training from scratch + model = transformers.AutoModelForMaskedLM.from_pretrained(args.checkpoint_path, attn_implementation="sdpa", ignore_mismatched_sizes=True) + if args.train_from_scratch: + model.apply(model._init_weights) + model.tie_weights() # probably not applicable + + tokenizer = transformers.AutoTokenizer.from_pretrained(args.tokenizer_name) + + # Data + raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name) + block_size = args.block_size if args.block_size is not None else model.config.max_position_embeddings + model.config.max_position_embeddings = block_size + tokenizer.model_max_length = block_size + + # QA-specific code + all_data = raw_datasets["validation"] + transformers.set_seed(42) + train_val_split = all_data.train_test_split(test_size=0.2, shuffle=True) + val_test_split = train_val_split['test'].train_test_split(test_size=0.5, shuffle=False) + train_dataset = train_val_split['train'] + val_dataset = val_test_split['train'] + test_dataset = val_test_split['test'] + + qa_pairs = defaultdict(list) + for data_name, dataset in zip(["test","train","val"], [train_dataset, test_dataset, val_dataset]): + for row in dataset: + tokenized_question = tokenizer("Question: "+ row["question"], return_tensors="pt")["input_ids"].squeeze(0) + for ans_type in ["correct_answers", "incorrect_answers"]: + for answer in row[ans_type]: + # the [:, 1:] thing is to remove CLS token + qa_pairs[data_name].append((tokenized_question, tokenizer(f"Answer: {answer}", return_tensors="pt")["input_ids"].squeeze(0)[1:])) + + train_dataset = DatasetAQ(qa_pairs["train"], args.model_direction, tokenizer) + test_dataset = DatasetAQ(qa_pairs["test"], args.model_direction, tokenizer) + val_dataset = DatasetAQ(qa_pairs["val"], args.model_direction, tokenizer) + + train_loader = DataLoader(train_dataset, batch_size=args.per_device_train_batch_size, shuffle=True) + test_loader = DataLoader(test_dataset, batch_size=args.per_device_eval_batch_size, shuffle=False) + val_loader = DataLoader(val_dataset, batch_size=args.per_device_eval_batch_size) + + model, train_loader, test_loader, val_loader = accelerator.prepare(model, train_loader, test_loader, val_loader) + + optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) + lr_scheduler = transformers.get_scheduler( + name=transformers.SchedulerType.COSINE, + optimizer=optimizer, + num_warmup_steps=args.warmup_steps * accelerator.num_processes, + # num_training_steps=args.num_train_epochs * math.ceil(len(train_loader) / args.gradient_accumulation_steps), + num_training_steps=args.num_train_epochs * len(train_loader), + ) + + lr_scheduler = accelerator.prepare(lr_scheduler) # testing if this fixes learning rate + + loss_fn = causal_loss_wrapper(args.model_direction) + + add_attn_hooks(model, args.model_direction) + model.train() + optimizer.zero_grad() + + wandb.require("core") + accelerator.init_trackers( + project_name="NLP-Class-Project", + config=vars(args) | {"model_parameters": sum(p.numel() for p in model.parameters())}, + init_kwargs={"wandb": {"entity": "frostbyte"}} + ) + + global_step = 0 # unaccumulated steps + past_losses = [] + best_val_loss = float("inf") + best_checkpt_path = os.path.join(args.output_dir, f"best_checkpt") + + for epoch in tqdm(range(args.num_train_epochs), position=0, leave=True, desc="Epoch"): + for step, batch in enumerate(tqdm(train_loader, position=1, leave=False, desc="Train Iteration")): + with accelerator.accumulate(model): + labels = batch.pop("labels") + outputs = model(**batch) + loss = loss_fn(outputs.logits, labels) + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + past_losses.append(loss.item()) + if (global_step + 1) % args.logging_steps == 0: + avg_train_loss = torch.tensor(past_losses).mean().item() # Assuming 1 GPU + accelerator.log({ + "train_loss": avg_train_loss, + "learning_rate": lr_scheduler.get_last_lr()[0], + }) + past_losses.clear() + + if (global_step + 1) % args.eval_steps == 0: + val_loss_sum = val_examples = 0 + model.eval() + for val_batch in tqdm(val_loader, position=2, leave=False, desc="Val Iteration"): + labels = val_batch.pop("labels") + with torch.no_grad(): + outputs = model(**val_batch) + + loss = loss_fn(outputs.logits, labels) + + batch_size = labels.size(0) + val_loss_sum += loss.item() * batch_size + val_examples += batch_size + + val_loss = val_loss_sum / val_examples + if val_loss < best_val_loss: + best_val_loss = val_loss + model.save_pretrained(best_checkpt_path) + + accelerator.log({"val_loss": val_loss_sum / val_examples}, + log_kwargs={"wandb": {"commit": False}}) + model.train() + + if ((step + 1) % args.gradient_accumulation_steps == 0) or step == (len(train_loader) - 1): + global_step += 1 + + # model.save_pretrained(os.path.join(args.output_dir, f"epoch_{epoch}_checkpt")) + + # testing + model.from_pretrained(best_checkpt_path) + model.eval() + with torch.no_grad(): + test_loss_sum = test_examples = 0 + for test_batch in tqdm(test_loader): + labels = test_batch.pop("labels") + outputs = model(**test_batch) + + loss = loss_fn(outputs.logits, labels) + + batch_size = labels.size(0) + test_loss_sum += loss.item() * batch_size + test_examples += batch_size + + accelerator.log({"test_loss": test_loss_sum / test_examples}) + + +if __name__ == "__main__": + main() diff --git a/finetune_bert-japanese.py b/finetune_bert-japanese.py new file mode 100644 index 0000000..f0b1238 --- /dev/null +++ b/finetune_bert-japanese.py @@ -0,0 +1,225 @@ +""" +# BERT japanese RTL +accelerate launch --mixed_precision bf16 finetune_bert-japanese.py \ +--model_direction rtl \ +--model_name distilbert/distilbert-base-multilingual-cased \ +--dataset_name ntotsuka123/ja-pretrain \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--gradient_accumulation_steps 1 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/distilbert_base_japan_rtl/ \ +--eval_steps 1000 \ +--block_size 128 \ +--num_train_epochs 1 \ +--weight_decay 1e-4 + + +is there some way to only do 1% of the data... +got it +you have to change the code. I don't want ot do it right now + +# BERT japanese LTR +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction rtl \ +--dataset_name oscar \ +--dataset_config_name unshuffled_deduplicated_ja \ +--model_name cl-tohoku/bert-base-japanese \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/bert_base_rtl/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + + +""" + +import argparse +import math +import os + +import accelerate +import torch +import transformers +import wandb +from datasets import load_dataset +from torch.utils.data import DataLoader, Subset +from tqdm.auto import tqdm +from transformers import set_seed + +from utils import preprocess_datasets, convert_to_torch_dataset, add_attn_hooks, causal_loss_wrapper + + + +def parse_args(): + """ + Re-using HuggingFace arguments when possible (most of the help strings are directly copied). + https://github.com/huggingface/transformers/blob/7bbc62474391aff64f63fcc064c975752d1fa4de/examples/pytorch/language-modeling/run_clm.py#L75 + """ + parser = argparse.ArgumentParser() + + # Model + parser.add_argument("--model_direction", type=str, required=True, choices=["ltr", "rtl"], + help="Whether to train a left-to-right or right-to-left LM.") + parser.add_argument("--model_config", type=str, + help="Path to model config json, from which to train_from_scratch.") + parser.add_argument("--model_name", type=str, required=True, + help="Name of tokenizer to load. " + "If model_config is not specified, will also load model architecture." + "If not training from scratch, will also load model weights.") + + # Data + parser.add_argument("--dataset_name", type=str, default="Salesforce/wikitext", + help="The name of the dataset to use (via the datasets library).") + parser.add_argument("--dataset_config_name", type=str, default="wikitext-103-v1", + help="The configuration name of the dataset to use (via the datasets library).") + # TODO: block_size, train on shorter seqs? + parser.add_argument( + "--block_size", + type=int, + help="Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) + + # Training + parser.add_argument("--train_from_scratch", action="store_true") + parser.add_argument("--output_dir", type=str, required=True, + help="The output directory where the model predictions and checkpoints will be written.") + parser.add_argument("--per_device_train_batch_size", type=int, default=8) + parser.add_argument("--per_device_eval_batch_size", type=int, default=16) + parser.add_argument("--gradient_accumulation_steps", type=int, default=1) + parser.add_argument("--num_train_epochs", type=int, default=1) + parser.add_argument("--learning_rate", type=float, required=True) + parser.add_argument("--warmup_steps", type=int, default=0) + parser.add_argument("--weight_decay", type=float, default=0.0) + parser.add_argument("--logging_steps", type=int, default=1, + help="Number of update steps between two logs.") + parser.add_argument("--eval_steps", type=int, default=20000, + help="Number of update steps between two logs.") + parser.add_argument("--dataloader_num_workers", type=int, default=8) + + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + + accelerator = accelerate.Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, log_with="wandb", project_dir=args.output_dir) + set_seed(42) + + # Will `add_attn_hooks` to `model` later + if args.model_config is not None: + assert args.train_from_scratch, "Expected to train from scratch when model_config is specified." + config = transformers.AutoConfig.from_pretrained(args.model_config) + model = transformers.AutoModelForMaskedLM.from_config(config) + else: + # Load model weights in both cases, but re-initialize if training from scratch + model = transformers.AutoModelForMaskedLM.from_pretrained(args.model_name, attn_implementation="sdpa") + + if args.train_from_scratch: + model.apply(model._initialize_weights) + model.tie_weights() # probably not applicable + + tokenizer = transformers.AutoTokenizer.from_pretrained(args.model_name) + + # Data + raw_datasets = load_dataset(args.dataset_name) + block_size = args.block_size if args.block_size is not None else model.config.max_position_embeddings + model.config.max_position_embeddings = block_size + + processed_datasets = preprocess_datasets(raw_datasets, tokenizer, block_size) + for split, hf_dataset in processed_datasets.items(): + processed_datasets[split] = convert_to_torch_dataset(hf_dataset) + + train_val_split = processed_datasets["train"].train_test_split(test_size=0.2, shuffle=True) + train_indices = torch.randperm(len(train_val_split["train"]))[:int(0.4 * len(train_val_split["train"]))] + train_subset = Subset(train_val_split["train"], train_indices) + val_indices = torch.randperm(len(train_val_split["test"]))[:int(0.01 * len(train_val_split["test"]))] + val_subset = Subset(train_val_split["test"], val_indices) + train_loader = DataLoader(train_subset, batch_size=args.per_device_train_batch_size, shuffle=True) + val_loader = DataLoader(val_subset, batch_size=args.per_device_eval_batch_size) + + # train_val_split = processed_datasets["train"].train_test_split(test_size=0.2, shuffle=True) + # train_loader = DataLoader(train_val_split["train"], batch_size=args.per_device_train_batch_size, shuffle=True) + # val_loader = DataLoader(train_val_split["test"], batch_size=args.per_device_eval_batch_size) + # test_loader = DataLoader(processed_datasets["test"], batch_size=args.per_device_eval_batch_size) + + model, train_loader, val_loader = accelerator.prepare(model, train_loader, val_loader) + + optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) + lr_scheduler = transformers.get_scheduler( + name=transformers.SchedulerType.CONSTANT, + optimizer=optimizer, + num_warmup_steps=args.warmup_steps * accelerator.num_processes, + num_training_steps=args.num_train_epochs * math.ceil(len(train_loader) / args.gradient_accumulation_steps), + ) + loss_fn = causal_loss_wrapper(args.model_direction) + + add_attn_hooks(model, args.model_direction) + model.train() + optimizer.zero_grad() + + wandb.require("core") + accelerator.init_trackers( + project_name="NLP-Class-Project", + config=vars(args) | {"model_parameters": sum(p.numel() for p in model.parameters())}, + init_kwargs={"wandb": {"entity": "frostbyte"}} + ) + + global_step = 0 # unaccumulated steps + past_losses = [] + for epoch in tqdm(range(args.num_train_epochs), position=0, leave=True, desc="Epoch"): + for step, batch in enumerate(tqdm(train_loader, position=1, leave=False, desc="Train Iteration")): + with accelerator.accumulate(model): + labels = batch.pop("labels") + outputs = model(**batch) + loss = loss_fn(outputs.logits, labels) + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + past_losses.append(loss.item()) + if (global_step + 1) % args.logging_steps == 0: + avg_train_loss = torch.tensor(past_losses).mean().item() # Assuming 1 GPU + accelerator.log({ + "train_loss": avg_train_loss, + "learning_rate": lr_scheduler.get_last_lr()[0], + }) + past_losses.clear() + + if (global_step + 1) % args.eval_steps == 0: + val_loss_sum = val_examples = 0 + model.eval() + for val_batch in tqdm(val_loader, position=2, leave=False, desc="Val Iteration"): + labels = val_batch.pop("labels") + with torch.no_grad(): + outputs = model(**val_batch) + + loss = loss_fn(outputs.logits, labels) + + batch_size = labels.size(0) + val_loss_sum += loss.item() * batch_size + val_examples += batch_size + + accelerator.log({"val_loss": val_loss_sum / val_examples}, + log_kwargs={"wandb": {"commit": False}}) + model.train() + + if ((step + 1) % args.gradient_accumulation_steps == 0) or step == (len(train_loader) - 1): + global_step += 1 + + model.save_pretrained(os.path.join(args.output_dir, f"epoch_{epoch}_checkpt")) + + +if __name__ == "__main__": + main() diff --git a/finetune_bert.py b/finetune_bert.py index 59c8090..da29af9 100644 --- a/finetune_bert.py +++ b/finetune_bert.py @@ -1,663 +1,330 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2020 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. """ -Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset. -Here is the full list of checkpoints on the hub that can be fine-tuned by this script: -https://huggingface.co/models?filter=text-generation +# BERT base +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction rtl \ +--model_name bert-base-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/bert_base_rtl/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction ltr \ +--model_name bert-base-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/bert_base_ltr/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +# DistilBERT scratch +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction rtl \ +--model_name distilbert/distilbert-base-uncased \ +--train_from_scratch \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/distilbert_base_rtl_scratch/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction ltr \ +--model_name distilbert/distilbert-base-uncased \ +--train_from_scratch \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/distilbert_base_ltr_scratch/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +# DistilBERT base +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction rtl \ +--model_name distilbert/distilbert-base-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/distilbert_base_rtl/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + + +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction ltr \ +--model_name distilbert/distilbert-base-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 128 \ +--per_device_eval_batch_size 128 \ +--output_dir checkpoints/distilbert_base_ltr/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +# BERT large +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction rtl \ +--model_name bert-large-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 64 \ +--gradient_accumulation_steps 2 \ +--per_device_eval_batch_size 64 \ +--output_dir checkpoints/bert_large_rtl/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + + +accelerate launch --mixed_precision bf16 finetune_bert.py \ +--model_direction ltr \ +--model_name bert-large-uncased \ +--warmup_steps 500 \ +--learning_rate 5e-5 \ +--per_device_train_batch_size 64 \ +--gradient_accumulation_steps 2 \ +--per_device_eval_batch_size 64 \ +--output_dir checkpoints/bert_large_ltr/ \ +--eval_steps 899 \ +--block_size 128 \ +--num_train_epochs 4 \ +--weight_decay 1e-4 + +for size in 35 19 11 6; do + for dir in ltr rtl; do + accelerate launch --mixed_precision bf16 finetune_bert.py \ + --model_direction $dir \ + --model_name bert-base-uncased \ + --model_config "configs/bert_${size}M.json" \ + --train_from_scratch \ + --warmup_steps 500 \ + --learning_rate 5e-5 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --output_dir "checkpoints/bert_${size}_${dir}_scratch/" \ + --eval_steps 899 \ + --block_size 128 \ + --num_train_epochs 4 \ + --weight_decay 1e-4 + done +done + +for seed in 0 1 2 3 4; do + for dir in ltr rtl; do + accelerate launch --mixed_precision bf16 finetune_bert.py \ + --model_direction $dir \ + --model_name bert-base-uncased \ + --model_config "configs/bert_${size}M.json" \ + --train_from_scratch \ + --warmup_steps 500 \ + --learning_rate 5e-5 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --output_dir "checkpoints/overwritable_temp/" \ + --eval_steps 899 \ + --block_size 128 \ + --num_train_epochs 1 \ + --weight_decay 1e-4 \ + --seed $seed + done +done """ -# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments. -""" -From https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_clm.py -""" - -import logging +import argparse import math import os -import sys -from dataclasses import dataclass, field -from itertools import chain -from typing import Optional -import datasets -import evaluate +import accelerate import torch -from datasets import load_dataset - import transformers -from transformers import ( - CONFIG_MAPPING, - MODEL_FOR_CAUSAL_LM_MAPPING, - AutoConfig, - AutoModelForCausalLM, - AutoTokenizer, - HfArgumentParser, - Trainer, - TrainingArguments, - default_data_collator, - is_torch_xla_available, - set_seed, -) -from transformers.testing_utils import CaptureLogger -from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version, send_example_telemetry -from transformers.utils.versions import require_version - - -# Will error if the minimal version of Transformers is not installed. Remove at your own risks. -check_min_version("4.47.0.dev0") - -require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") - -logger = logging.getLogger(__name__) - - -MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys()) -MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) - - -@dataclass -class ModelArguments: - """ - Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch. - """ - # text_direction: str = field( - # - # ) - model_name_or_path: Optional[str] = field( - default=None, - metadata={ - "help": ( - "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch." - ) - }, - ) - # model_type: Optional[str] = field( - # default=None, - # metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)}, - # ) - config_overrides: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" - ) - }, - ) - config_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} - ) - tokenizer_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} - ) - cache_dir: Optional[str] = field( - default=None, - metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, - ) - use_fast_tokenizer: bool = field( - default=True, - metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, - ) - model_revision: str = field( - default="main", - metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, - ) - token: str = field( - default=None, - metadata={ - "help": ( - "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " - "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." - ) - }, - ) - trust_remote_code: bool = field( - default=False, - metadata={ - "help": ( - "Whether to trust the execution of code from datasets/models defined on the Hub." - " This option should only be set to `True` for repositories you trust and in which you have read the" - " code, as it will execute code present on the Hub on your local machine." - ) - }, - ) - torch_dtype: Optional[str] = field( - default=None, - metadata={ - "help": ( - "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the " - "dtype will be automatically derived from the model's weights." - ), - "choices": ["auto", "bfloat16", "float16", "float32"], - }, - ) - low_cpu_mem_usage: bool = field( - default=False, - metadata={ - "help": ( - "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. " - "set True will benefit LLM loading time and RAM consumption." - ) - }, - ) +import wandb +from datasets import load_dataset +from torch.utils.data import DataLoader +from tqdm.auto import tqdm +from transformers import set_seed - def __post_init__(self): - if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): - raise ValueError( - "--config_overrides can't be used in combination with --config_name or --model_name_or_path" - ) +from utils import preprocess_datasets, convert_to_torch_dataset, add_attn_hooks, causal_loss_wrapper -@dataclass -class DataTrainingArguments: +def parse_args(): """ - Arguments pertaining to what data we are going to input our model for training and eval. + Re-using HuggingFace arguments when possible (most of the help strings are directly copied). + https://github.com/huggingface/transformers/blob/7bbc62474391aff64f63fcc064c975752d1fa4de/examples/pytorch/language-modeling/run_clm.py#L75 """ - - dataset_name: Optional[str] = field( - default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} - ) - dataset_config_name: Optional[str] = field( - default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} - ) - train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."}) - validation_file: Optional[str] = field( - default=None, - metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."}, - ) - max_train_samples: Optional[int] = field( - default=None, - metadata={ - "help": ( - "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." - ) - }, - ) - max_eval_samples: Optional[int] = field( - default=None, - metadata={ - "help": ( - "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." - ) - }, - ) - streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"}) - block_size: Optional[int] = field( - default=None, - metadata={ - "help": ( - "Optional input sequence length after tokenization. " - "The training dataset will be truncated in block of this size for training. " - "Default to the model max input length for single sentence inputs (take into account special tokens)." - ) - }, - ) - overwrite_cache: bool = field( - default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + parser = argparse.ArgumentParser() + + # Model + parser.add_argument("--model_direction", type=str, required=True, choices=["ltr", "rtl"], + help="Whether to train a left-to-right or right-to-left LM.") + parser.add_argument("--model_config", type=str, + help="Path to model config json, from which to train_from_scratch.") + parser.add_argument("--model_name", type=str, required=True, + help="Name of tokenizer to load. " + "If model_config is not specified, will also load model architecture." + "If not training from scratch, will also load model weights.") + + # Data + parser.add_argument("--dataset_name", type=str, default="Salesforce/wikitext", + help="The name of the dataset to use (via the datasets library).") + parser.add_argument("--dataset_config_name", type=str, default="wikitext-103-v1", + help="The configuration name of the dataset to use (via the datasets library).") + # TODO: block_size, train on shorter seqs? + parser.add_argument( + "--block_size", + type=int, + help="Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." ) - validation_split_percentage: Optional[int] = field( - default=5, - metadata={ - "help": "The percentage of the train set used as validation set in case there's no validation split" - }, - ) - preprocessing_num_workers: Optional[int] = field( - default=None, - metadata={"help": "The number of processes to use for the preprocessing."}, - ) - keep_linebreaks: bool = field( - default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."} - ) - - def __post_init__(self): - if self.streaming: - require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`") - if self.dataset_name is None and self.train_file is None and self.validation_file is None: - raise ValueError("Need either a dataset name or a training/validation file.") - else: - if self.train_file is not None: - extension = self.train_file.split(".")[-1] - assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file." - if self.validation_file is not None: - extension = self.validation_file.split(".")[-1] - assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file." + # Training + parser.add_argument("--train_from_scratch", action="store_true") + parser.add_argument("--output_dir", type=str, required=True, + help="The output directory where the model predictions and checkpoints will be written.") + parser.add_argument("--per_device_train_batch_size", type=int, default=8) + parser.add_argument("--per_device_eval_batch_size", type=int, default=16) + parser.add_argument("--gradient_accumulation_steps", type=int, default=1) + parser.add_argument("--num_train_epochs", type=int, default=1) + parser.add_argument("--learning_rate", type=float, required=True) + parser.add_argument("--warmup_steps", type=int, default=0) + parser.add_argument("--weight_decay", type=float, default=0.0) + parser.add_argument("--logging_steps", type=int, default=1, + help="Number of update steps between two logs.") + parser.add_argument("--eval_steps", type=int, default=20000, + help="Number of update steps between two logs.") + parser.add_argument("--dataloader_num_workers", type=int, default=8) + parser.add_argument("--seed", type=int, default=42, help="Random seed.") + + args = parser.parse_args() + + return args def main(): - # See all possible arguments in src/transformers/training_args.py - # or by passing the --help flag to this script. - # We now keep distinct sets of args, for a cleaner separation of concerns. - - parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) - if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - # If we pass only one argument to the script and it's the path to a json file, - # let's parse it to get our arguments. - model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) - else: - model_args, data_args, training_args = parser.parse_args_into_dataclasses() + args = parse_args() - # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The - # information sent is the one passed as arguments along with your Python/PyTorch versions. - send_example_telemetry("run_clm", model_args, data_args) + accelerator = accelerate.Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, log_with="wandb", project_dir=args.output_dir) + set_seed(args.seed) - # Setup logging - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - handlers=[logging.StreamHandler(sys.stdout)], - ) - - if training_args.should_log: - # The default of training_args.log_level is passive, so we set log level at info here to have that default. - transformers.utils.logging.set_verbosity_info() - - log_level = training_args.get_process_log_level() - logger.setLevel(log_level) - datasets.utils.logging.set_verbosity(log_level) - transformers.utils.logging.set_verbosity(log_level) - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() - - # Log on each process the small summary: - logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " - + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}" - ) - logger.info(f"Training/evaluation parameters {training_args}") - - # Detecting last checkpoint. - last_checkpoint = None - if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: - last_checkpoint = get_last_checkpoint(training_args.output_dir) - if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: - raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. " - "Use --overwrite_output_dir to overcome." - ) - elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: - logger.info( - f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " - "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." - ) - - # Set seed before initializing model. - set_seed(training_args.seed) - - # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) - # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ - # (the dataset will be downloaded automatically from the datasets Hub). - # - # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called - # 'text' is found. You can easily tweak this behavior (see below). - # - # In distributed training, the load_dataset function guarantee that only one local process can concurrently - # download the dataset. - if data_args.dataset_name is not None: - # Downloading and loading a dataset from the hub. - raw_datasets = load_dataset( - data_args.dataset_name, - data_args.dataset_config_name, - cache_dir=model_args.cache_dir, - token=model_args.token, - streaming=data_args.streaming, - trust_remote_code=model_args.trust_remote_code, - ) - if "validation" not in raw_datasets.keys(): - raw_datasets["validation"] = load_dataset( - data_args.dataset_name, - data_args.dataset_config_name, - split=f"train[:{data_args.validation_split_percentage}%]", - cache_dir=model_args.cache_dir, - token=model_args.token, - streaming=data_args.streaming, - trust_remote_code=model_args.trust_remote_code, - ) - raw_datasets["train"] = load_dataset( - data_args.dataset_name, - data_args.dataset_config_name, - split=f"train[{data_args.validation_split_percentage}%:]", - cache_dir=model_args.cache_dir, - token=model_args.token, - streaming=data_args.streaming, - trust_remote_code=model_args.trust_remote_code, - ) - else: - data_files = {} - dataset_args = {} - if data_args.train_file is not None: - data_files["train"] = data_args.train_file - if data_args.validation_file is not None: - data_files["validation"] = data_args.validation_file - extension = ( - data_args.train_file.split(".")[-1] - if data_args.train_file is not None - else data_args.validation_file.split(".")[-1] - ) - if extension == "txt": - extension = "text" - dataset_args["keep_linebreaks"] = data_args.keep_linebreaks - raw_datasets = load_dataset( - extension, - data_files=data_files, - cache_dir=model_args.cache_dir, - token=model_args.token, - **dataset_args, - ) - # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): - raw_datasets["validation"] = load_dataset( - extension, - data_files=data_files, - split=f"train[:{data_args.validation_split_percentage}%]", - cache_dir=model_args.cache_dir, - token=model_args.token, - **dataset_args, - ) - raw_datasets["train"] = load_dataset( - extension, - data_files=data_files, - split=f"train[{data_args.validation_split_percentage}%:]", - cache_dir=model_args.cache_dir, - token=model_args.token, - **dataset_args, - ) - - # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at - # https://huggingface.co/docs/datasets/loading_datasets. - - # Load pretrained model and tokenizer - # - # Distributed training: - # The .from_pretrained methods guarantee that only one local process can concurrently - # download model & vocab. - - config_kwargs = { - "cache_dir": model_args.cache_dir, - "revision": model_args.model_revision, - "token": model_args.token, - "trust_remote_code": model_args.trust_remote_code, - } - if model_args.config_name: - config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) - elif model_args.model_name_or_path: - config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) - else: - config = CONFIG_MAPPING[model_args.model_type]() - logger.warning("You are instantiating a new config instance from scratch.") - if model_args.config_overrides is not None: - logger.info(f"Overriding config: {model_args.config_overrides}") - config.update_from_string(model_args.config_overrides) - logger.info(f"New config: {config}") - - tokenizer_kwargs = { - "cache_dir": model_args.cache_dir, - "use_fast": model_args.use_fast_tokenizer, - "revision": model_args.model_revision, - "token": model_args.token, - "trust_remote_code": model_args.trust_remote_code, - } - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is not supported by this script. " - "You can do it from another script, save it, and load it from here, using --tokenizer_name." - ) - - if model_args.model_name_or_path: - torch_dtype = ( - model_args.torch_dtype - if model_args.torch_dtype in ["auto", None] - else getattr(torch, model_args.torch_dtype) - ) - model = AutoModelForCausalLM.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - token=model_args.token, - trust_remote_code=model_args.trust_remote_code, - torch_dtype=torch_dtype, - low_cpu_mem_usage=model_args.low_cpu_mem_usage, - ) - else: - model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code) - n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") - - # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch - # on a small vocab and want a smaller embedding size, remove this test. - embedding_size = model.get_input_embeddings().weight.shape[0] - if len(tokenizer) > embedding_size: - model.resize_token_embeddings(len(tokenizer)) - - # Preprocessing the datasets. - # First we tokenize all the texts. - if training_args.do_train: - column_names = list(raw_datasets["train"].features) - else: - column_names = list(raw_datasets["validation"].features) - text_column_name = "text" if "text" in column_names else column_names[0] - - # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function - tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base") - - def tokenize_function(examples): - with CaptureLogger(tok_logger) as cl: - output = tokenizer(examples[text_column_name]) - # clm input could be much much longer than block_size - if "Token indices sequence length is longer than the" in cl.out: - tok_logger.warning( - "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" - " before being passed to the model." - ) - return output - - with training_args.main_process_first(desc="dataset map tokenization"): - if not data_args.streaming: - tokenized_datasets = raw_datasets.map( - tokenize_function, - batched=True, - num_proc=data_args.preprocessing_num_workers, - remove_columns=column_names, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on dataset", - ) - else: - tokenized_datasets = raw_datasets.map( - tokenize_function, - batched=True, - remove_columns=column_names, - ) - if hasattr(config, "max_position_embeddings"): - max_pos_embeddings = config.max_position_embeddings + # Will `add_attn_hooks` to `model` later + if args.model_config is not None: + assert args.train_from_scratch, "Expected to train from scratch when model_config is specified." + config = transformers.AutoConfig.from_pretrained(args.model_config) + model = transformers.AutoModelForMaskedLM.from_config(config) else: - # Define a default value if the attribute is missing in the config. - max_pos_embeddings = 1024 - - if data_args.block_size is None: - block_size = tokenizer.model_max_length - if block_size > max_pos_embeddings: - logger.warning( - f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). " - f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx." - ) - if max_pos_embeddings > 0: - block_size = min(1024, max_pos_embeddings) - else: - block_size = 1024 - else: - if data_args.block_size > tokenizer.model_max_length: - logger.warning( - f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model " - f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}." - ) - block_size = min(data_args.block_size, tokenizer.model_max_length) - - # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. - def group_texts(examples): - # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} - total_length = len(concatenated_examples[list(examples.keys())[0]]) - # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. - # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. - total_length = (total_length // block_size) * block_size - # Split by chunks of max_len. - result = { - k: [t[i : i + block_size] for i in range(0, total_length, block_size)] - for k, t in concatenated_examples.items() - } - result["labels"] = result["input_ids"].copy() - return result - - # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder - # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower - # to preprocess. - # - # To speed up this part, we use multiprocessing. See the documentation of the map method for more information: - # https://huggingface.co/docs/datasets/process#map - - with training_args.main_process_first(desc="grouping texts together"): - if not data_args.streaming: - lm_datasets = tokenized_datasets.map( - group_texts, - batched=True, - num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=not data_args.overwrite_cache, - desc=f"Grouping texts in chunks of {block_size}", - ) - else: - lm_datasets = tokenized_datasets.map( - group_texts, - batched=True, - ) - - if training_args.do_train: - if "train" not in tokenized_datasets: - raise ValueError("--do_train requires a train dataset") - train_dataset = lm_datasets["train"] - if data_args.max_train_samples is not None: - max_train_samples = min(len(train_dataset), data_args.max_train_samples) - train_dataset = train_dataset.select(range(max_train_samples)) - - if training_args.do_eval: - if "validation" not in tokenized_datasets: - raise ValueError("--do_eval requires a validation dataset") - eval_dataset = lm_datasets["validation"] - if data_args.max_eval_samples is not None: - max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) - eval_dataset = eval_dataset.select(range(max_eval_samples)) - - def preprocess_logits_for_metrics(logits, labels): - if isinstance(logits, tuple): - # Depending on the model and config, logits may contain extra tensors, - # like past_key_values, but logits always come first - logits = logits[0] - return logits.argmax(dim=-1) - - metric = evaluate.load("accuracy", cache_dir=model_args.cache_dir) - - def compute_metrics(eval_preds): - preds, labels = eval_preds - # preds have the same shape as the labels, after the argmax(-1) has been calculated - # by preprocess_logits_for_metrics but we need to shift the labels - labels = labels[:, 1:].reshape(-1) - preds = preds[:, :-1].reshape(-1) - return metric.compute(predictions=preds, references=labels) - - # Initialize our Trainer - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_dataset if training_args.do_train else None, - eval_dataset=eval_dataset if training_args.do_eval else None, - processing_class=tokenizer, - # Data collator will default to DataCollatorWithPadding, so we change it. - data_collator=default_data_collator, - compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None, - preprocess_logits_for_metrics=preprocess_logits_for_metrics - if training_args.do_eval and not is_torch_xla_available() - else None, + # Load model weights in both cases, but re-initialize if training from scratch + model = transformers.AutoModelForMaskedLM.from_pretrained(args.model_name, attn_implementation="sdpa") + + if args.train_from_scratch: + model.apply(model._initialize_weights) + model.tie_weights() # probably not applicable + + tokenizer = transformers.AutoTokenizer.from_pretrained(args.model_name) + + # Data + raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name) + block_size = args.block_size if args.block_size is not None else model.config.max_position_embeddings + model.config.max_position_embeddings = block_size + + processed_datasets = preprocess_datasets(raw_datasets, tokenizer, block_size) + for split, hf_dataset in processed_datasets.items(): + processed_datasets[split] = convert_to_torch_dataset(hf_dataset) + + train_loader = DataLoader(processed_datasets["train"], batch_size=args.per_device_train_batch_size, shuffle=True) + val_loader = DataLoader(processed_datasets["validation"], batch_size=args.per_device_eval_batch_size) + # test_loader = DataLoader(processed_datasets["test"], batch_size=args.per_device_eval_batch_size) + model, train_loader, val_loader = accelerator.prepare(model, train_loader, val_loader) + + optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) + lr_scheduler = transformers.get_scheduler( + name=transformers.SchedulerType.COSINE, + optimizer=optimizer, + num_warmup_steps=args.warmup_steps * accelerator.num_processes, + num_training_steps=args.num_train_epochs * math.ceil(len(train_loader) / args.gradient_accumulation_steps), ) + loss_fn = causal_loss_wrapper(args.model_direction) - # Training - if training_args.do_train: - checkpoint = None - if training_args.resume_from_checkpoint is not None: - checkpoint = training_args.resume_from_checkpoint - elif last_checkpoint is not None: - checkpoint = last_checkpoint - train_result = trainer.train(resume_from_checkpoint=checkpoint) - trainer.save_model() # Saves the tokenizer too for easy upload - - metrics = train_result.metrics - - max_train_samples = ( - data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) - ) - metrics["train_samples"] = min(max_train_samples, len(train_dataset)) - - trainer.log_metrics("train", metrics) - trainer.save_metrics("train", metrics) - trainer.save_state() - - # Evaluation - if training_args.do_eval: - logger.info("*** Evaluate ***") - - metrics = trainer.evaluate() - - max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) - metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) - try: - perplexity = math.exp(metrics["eval_loss"]) - except OverflowError: - perplexity = float("inf") - metrics["perplexity"] = perplexity - - trainer.log_metrics("eval", metrics) - trainer.save_metrics("eval", metrics) - - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name - - if training_args.push_to_hub: - trainer.push_to_hub(**kwargs) - else: - trainer.create_model_card(**kwargs) + add_attn_hooks(model, args.model_direction) + model.train() + optimizer.zero_grad() + wandb.require("core") + accelerator.init_trackers( + project_name="NLP-Class-Project", + config=vars(args) | {"model_parameters": sum(p.numel() for p in model.parameters())}, + init_kwargs={"wandb": {"entity": "frostbyte"}} + ) -def _mp_fn(index): - # For xla_spawn (TPUs) - main() + global_step = 0 # unaccumulated steps + past_losses = [] + for epoch in tqdm(range(args.num_train_epochs), position=0, leave=True, desc="Epoch"): + for step, batch in enumerate(tqdm(train_loader, position=1, leave=False, desc="Train Iteration")): + with accelerator.accumulate(model): + labels = batch.pop("labels") + outputs = model(**batch) + loss = loss_fn(outputs.logits, labels) + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + past_losses.append(loss.item()) + if (global_step + 1) % args.logging_steps == 0: + avg_train_loss = torch.tensor(past_losses).mean().item() # Assuming 1 GPU + accelerator.log({ + "train_loss": avg_train_loss, + "learning_rate": lr_scheduler.get_last_lr()[0], + }) + past_losses.clear() + + if (global_step + 1) % args.eval_steps == 0: + val_loss_sum = val_examples = 0 + model.eval() + for val_batch in tqdm(val_loader, position=2, leave=False, desc="Val Iteration"): + labels = val_batch.pop("labels") + with torch.no_grad(): + outputs = model(**val_batch) + + loss = loss_fn(outputs.logits, labels) + + batch_size = labels.size(0) + val_loss_sum += loss.item() * batch_size + val_examples += batch_size + + accelerator.log({"val_loss": val_loss_sum / val_examples}, + log_kwargs={"wandb": {"commit": False}}) + model.train() + + if ((step + 1) % args.gradient_accumulation_steps == 0) or step == (len(train_loader) - 1): + global_step += 1 + + model.save_pretrained(os.path.join(args.output_dir, f"epoch_{epoch}_checkpt")) if __name__ == "__main__": - main()
\ No newline at end of file + main() diff --git a/notebooks/Inference.ipynb b/notebooks/Inference.ipynb new file mode 100644 index 0000000..670a127 --- /dev/null +++ b/notebooks/Inference.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "34c536f2-3ccb-4df0-bd47-913d6ef040a2", + "metadata": {}, + "source": [ + "# Inference" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "execution_state": "idle", + "id": "3c6381c4-2a02-415f-a5f1-450fe42b30d3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr_v2/best_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"..\")\n", + "\n", + "import torch\n", + "import transformers\n", + "\n", + "from utils import add_attn_hooks\n", + "\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt\", ignore_mismatched_sizes=True)\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr_overfit/epoch_999_checkpt\", ignore_mismatched_sizes=True)\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr/epoch_49_checkpt\", ignore_mismatched_sizes=True)\n", + "model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr_v2/best_checkpt\", ignore_mismatched_sizes=True)\n", + "\n", + "# tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/bert_base_ltr/epoch_3_checkpt\", ignore_mismatched_sizes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "execution_state": "idle", + "id": "41edf867-7f6a-4d44-871c-8af0b7af7543", + "metadata": {}, + "outputs": [], + "source": [ + "from utils import add_attn_hooks\n", + "add_attn_hooks(model, \"ltr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "execution_state": "idle", + "id": "d79a3d16-b982-42ed-9b6a-fad8328e177e", + "metadata": {}, + "outputs": [], + "source": [ + "model.eval();" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "execution_state": "idle", + "id": "f06d4bc2-9df8-42c2-9397-3bfb3728da0b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr_overfit/epoch_999_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + } + ], + "source": [ + "from typing import override\n", + "class DecoderMLM(transformers.AutoModelForMaskedLM, transformers.GenerationMixin):\n", + " @override\n", + " # @classmethod\n", + " def can_generate(cls):\n", + " return True\n", + "\n", + "model2 = DecoderMLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/qa_distilbert_base_ltr_overfit/epoch_999_checkpt\", ignore_mismatched_sizes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "execution_state": "idle", + "id": "6feb4fdd-ae43-466d-8dce-a4f9a632a5e6", + "metadata": {}, + "outputs": [], + "source": [ + "# model2.can_generate = (lambda s: True)\n", + "model2.can_generate = (lambda: True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "execution_state": "idle", + "id": "c178761c-7124-42ed-9bfc-7ab0f782aad7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model2." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "execution_state": "idle", + "id": "3ab53852-f333-47ea-9e96-55266cda84a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model2.can_generate()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "execution_state": "idle", + "id": "a30f6240-f982-45b0-b75f-3be5bbb43049", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 25, + "execution_state": "idle", + "id": "2cefa784-a1cc-445a-8ffa-066e7cfccaf0", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "The current model class (DistilBertForMaskedLM) is not compatible with `.generate()`, as it doesn't have a language model head. Classes that support generation often end in one of these names: ['ForCausalLM', 'ForConditionalGeneration', 'ForSpeechSeq2Seq', 'ForVision2Seq'].", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[25], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m tokenized_question \u001b[38;5;241m=\u001b[39m tokenizer(question, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39minference_mode():\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(tokenizer\u001b[38;5;241m.\u001b[39mbatch_decode(\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtokenized_question\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput_ids\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_beams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdo_sample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1.\u001b[39;49m\u001b[43m)\u001b[49m))\n", + "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/transformers/generation/utils.py:1967\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 1882\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1883\u001b[0m \n\u001b[1;32m 1884\u001b[0m \u001b[38;5;124;03mGenerates sequences of token ids for models with a language modeling head.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1963\u001b[0m \u001b[38;5;124;03m - [`~generation.GenerateBeamEncoderDecoderOutput`]\u001b[39;00m\n\u001b[1;32m 1964\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1966\u001b[0m \u001b[38;5;66;03m# 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call\u001b[39;00m\n\u001b[0;32m-> 1967\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_model_class\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1968\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;66;03m# Pull this out first, we only use it for stopping criteria\u001b[39;00m\n\u001b[1;32m 1969\u001b[0m assistant_tokenizer \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124massistant_tokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;66;03m# only used for assisted generation\u001b[39;00m\n", + "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/transformers/generation/utils.py:1269\u001b[0m, in \u001b[0;36mGenerationMixin._validate_model_class\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1262\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torchdynamo_compiling() \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcan_generate():\n\u001b[1;32m 1263\u001b[0m terminations_with_generation_support \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1264\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mForCausalLM\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mForConditionalGeneration\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1266\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mForSpeechSeq2Seq\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mForVision2Seq\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1268\u001b[0m ]\n\u001b[0;32m-> 1269\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe current model class (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) is not compatible with `.generate()`, as \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mit doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a language model head. Classes that support generation often end in one of these \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mterminations_with_generation_support\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1273\u001b[0m )\n", + "\u001b[0;31mTypeError\u001b[0m: The current model class (DistilBertForMaskedLM) is not compatible with `.generate()`, as it doesn't have a language model head. Classes that support generation often end in one of these names: ['ForCausalLM', 'ForConditionalGeneration', 'ForSpeechSeq2Seq', 'ForVision2Seq']." + ] + } + ], + "source": [ + "question = \"Answer: Grapes are toxic to foxes in large quantities\"\n", + "tokenized_question = tokenizer(question, return_tensors=\"pt\")\n", + "\n", + "with torch.inference_mode():\n", + " print(tokenizer.batch_decode(model.generate(tokenized_question[\"input_ids\"][:, :-1], num_beams=5, do_sample=True, temperature=1.)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c475851e-7c86-46fd-b4ad-cd51caa6e7b8", + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Apples are red and \"\n", + "tokenized_question = tokenizer(question, return_tensors=\"pt\")\n", + "\n", + "with torch.inference_mode():\n", + " print(tokenizer.batch_decode(model.generate(tokenized_question[\"input_ids\"][:, :-1], num_beams=5, do_sample=True)))" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "execution_state": "idle", + "id": "1365ab75-a022-42fe-9168-d49a645af0d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'answer : if you go outdoors in cold weather with wet hair, your hair may freeze [SEP] [CLS] question :'" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer.decode([3437, 1024, 2065, 2017, 2175, 19350, 1999, 3147, 4633, 2007,\n", + " 4954, 2606, 1010, 2115, 2606, 2089, 13184, 102, 101, 3160,\n", + " 1024])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "execution_state": "idle", + "id": "58bffbb3-77fb-4f57-a77e-303fca05a84f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated token: what\n", + "Generated token: is\n", + "Generated token: the\n", + "Generated token: chicken\n", + "Generated token: cry\n", + "Generated token: ?\n", + "Generated token: [SEP]\n", + "Generated token: [CLS]\n", + "Generated token: question\n", + "Generated token: :\n", + "Generated token: what\n", + "Generated token: is\n", + "Generated token: the\n", + "Generated token: chicken\n", + "Generated token: cry\n", + "Generated token: ?\n", + "Generated token: [SEP]\n", + "Generated token: [CLS]\n", + "Generated token: question\n", + "Generated token: :\n", + "Generated token: what\n", + "Generated token: is\n", + "Generated token: the\n", + "Generated token: chicken\n", + "Generated token: cry\n", + "Generated token: ?\n", + "Generated token: [SEP]\n", + "Generated token: [CLS]\n", + "Generated token: what\n", + "Generated token: is\n" + ] + } + ], + "source": [ + "# question = \"answer : grapes are toxic to foxes in large quantities. [SEP] [CLS] question :\"\n", + "question = \"answer : your chicken cries. [SEP] [CLS] question :\"\n", + "# question = \"answer : if you go outdoors in cold weather with wet hair, your hair may freeze [SEP] [CLS] question :\"\n", + "\n", + "# input_ids = tokenizer(question, return_tensors=\"pt\").input_ids[:, :-1]\n", + "input_ids = tokenizer(question, return_tensors=\"pt\", add_special_tokens=False).input_ids\n", + "#tokenized_question = {\n", + "# \"input_ids\": torch.tensor([[3437, 1024, 2498, 6433, 102, 101, 3160, 1024]], dtype=torch.long)\n", + "#}\n", + "# input_ids = torch.tensor([[3437, 1024, 2065, 2017, 2175, 19350, 1999, 3147, 4633, 2007,\n", + "# 4954, 2606, 1010, 2115, 2606, 2089, 13184, 102, 101, 3160,\n", + "# 1024]])\n", + "\n", + "with torch.no_grad():\n", + " for i in range(30):\n", + " # Get logits for the last token in the sequence\n", + " logits = model(input_ids).logits[0, -1, :]\n", + " \n", + " # Select the token with the highest probability\n", + " next_token_id = torch.argmax(logits).item()\n", + " \n", + " # Update the input_ids with the new token\n", + " input_ids = torch.cat([input_ids, torch.tensor([[next_token_id]])], dim=1)\n", + " \n", + " # Decode the next token for readability\n", + " next_token = tokenizer.decode(next_token_id)\n", + " \n", + " print(f\"Generated token: {next_token}\")\n", + "# but we did train on incorrect answers" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "execution_state": "idle", + "id": "972352ea-4c28-42d9-a834-26daa83b2290", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated token: how\n", + "Generated token: long\n", + "Generated token: should\n", + "Generated token: you\n", + "Generated token: wait\n", + "Generated token: before\n", + "Generated token: filing\n", + "Generated token: a\n", + "Generated token: missing\n", + "Generated token: person\n", + "Generated token: report\n", + "Generated token: ?\n", + "Generated token: [SEP]\n", + "Generated token: [CLS]\n", + "Generated token: question\n", + "Generated token: :\n", + "Generated token: how\n", + "Generated token: long\n", + "Generated token: should\n", + "Generated token: you\n", + "Generated token: wait\n", + "Generated token: before\n", + "Generated token: filing\n", + "Generated token: a\n", + "Generated token: missing\n", + "Generated token: person\n", + "Generated token: report\n", + "Generated token: ?\n", + "Generated token: [SEP]\n", + "Generated token: [CLS]\n" + ] + } + ], + "source": [ + "# question = \"answer : grapes are toxic to foxes in large quantities. [SEP] [CLS] question :\"\n", + "question = \"answer : you are late to work. [SEP] [CLS] question :\"\n", + "# question = \"answer : if you go outdoors in cold weather with wet hair, your hair may freeze [SEP] [CLS] question :\"\n", + "\n", + "# input_ids = tokenizer(question, return_tensors=\"pt\").input_ids[:, :-1]\n", + "input_ids = tokenizer(question, return_tensors=\"pt\", add_special_tokens=False).input_ids\n", + "#tokenized_question = {\n", + "# \"input_ids\": torch.tensor([[3437, 1024, 2498, 6433, 102, 101, 3160, 1024]], dtype=torch.long)\n", + "#}\n", + "# input_ids = torch.tensor([[3437, 1024, 2065, 2017, 2175, 19350, 1999, 3147, 4633, 2007,\n", + "# 4954, 2606, 1010, 2115, 2606, 2089, 13184, 102, 101, 3160,\n", + "# 1024]])\n", + "\n", + "with torch.no_grad():\n", + " for i in range(30):\n", + " # Get logits for the last token in the sequence\n", + " logits = model(input_ids).logits[0, -1, :]\n", + " \n", + " # Select the token with the highest probability\n", + " next_token_id = torch.argmax(logits).item()\n", + " \n", + " # Update the input_ids with the new token\n", + " input_ids = torch.cat([input_ids, torch.tensor([[next_token_id]])], dim=1)\n", + " \n", + " # Decode the next token for readability\n", + " next_token = tokenizer.decode(next_token_id)\n", + " \n", + " print(f\"Generated token: {next_token}\")\n", + "# but we did train on incorrect answers" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "execution_state": "idle", + "id": "6b1949d3-343d-49ba-b50e-79dc36d9124e", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'DistilBertForMaskedLM' object has no attribute 'tokenizer'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[29], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/torch/nn/modules/module.py:1931\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m 1930\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1931\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m 1932\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1933\u001b[0m )\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DistilBertForMaskedLM' object has no attribute 'tokenizer'" + ] + } + ], + "source": [ + "model.tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "execution_state": "idle", + "id": "f57b41c0-2056-4164-914b-f298ad66c0c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated token: [CLS]\n", + "Generated token: the\n", + "Generated token: vampires\n", + "Generated token: are\n", + "Generated token: the\n", + "Generated token: vampires\n", + "Generated token: ,\n", + "Generated token: vampires\n", + "Generated token: are\n", + "Generated token: living\n", + "Generated token: ,\n", + "Generated token: who\n", + "Generated token: believe\n", + "Generated token: the\n", + "Generated token: vampires\n", + "Generated token: .\n", + "Generated token: vampire\n", + "Generated token: ,\n", + "Generated token: and\n", + "Generated token: who\n", + "Generated token: are\n", + "Generated token: vampires\n", + "Generated token: ,\n", + "Generated token: who\n", + "Generated token: are\n", + "Generated token: also\n", + "Generated token: .\n", + "Generated token: vampires\n", + "Generated token: who\n", + "Generated token: do\n" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "question = \"Answer: Vampires are real. Question:\"\n", + "tokenized_question = tokenizer(question, return_tensors=\"pt\")\n", + "\n", + "temperature = 0.7 # Set your temperature here (e.g., 0.7 for less randomness)\n", + "\n", + "with torch.no_grad():\n", + " for i in range(30):\n", + " # Get logits for the last token in the sequence\n", + " logits = model(tokenized_question[\"input_ids\"]).logits[0, -1, :]\n", + " \n", + " # Apply temperature scaling\n", + " logits = logits / temperature\n", + " \n", + " # Convert logits to probabilities using softmax\n", + " probs = F.softmax(logits, dim=-1)\n", + " \n", + " # Sample from the distribution\n", + " next_token_id = torch.multinomial(probs, num_samples=1).item()\n", + " \n", + " # Update the input_ids with the new token\n", + " tokenized_question['input_ids'] = torch.cat([tokenized_question['input_ids'], torch.tensor([[next_token_id]])], dim=1)\n", + " \n", + " # Decode the next token for readability\n", + " next_token = tokenizer.decode(next_token_id)\n", + " \n", + " print(f\"Generated token: {next_token}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "execution_state": "idle", + "id": "e8bffe5d-d830-4992-9381-b484672ffeda", + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "too many indices for tensor of dimension 1", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[129], line 35\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# Create candidates\u001b[39;00m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(beam_width):\n\u001b[0;32m---> 35\u001b[0m token_id \u001b[38;5;241m=\u001b[39m \u001b[43mtop_k_ids\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mitem()\n\u001b[1;32m 36\u001b[0m token_prob \u001b[38;5;241m=\u001b[39m top_k_probs[\u001b[38;5;241m0\u001b[39m, i]\u001b[38;5;241m.\u001b[39mitem()\n\u001b[1;32m 38\u001b[0m \u001b[38;5;66;03m# Create a new sequence by appending the token to the existing sequence\u001b[39;00m\n", + "\u001b[0;31mIndexError\u001b[0m: too many indices for tensor of dimension 1" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "question = \"Question: Are Vampires real. Answer:\"\n", + "tokenized_question = tokenizer(question, return_tensors=\"pt\")\n", + "\n", + "# Parameters\n", + "beam_width = 3 # The number of beams to consider\n", + "max_length = 30 # Maximum number of tokens to generate\n", + "temperature = 1.0 # Temperature for softmax\n", + "\n", + "# Initialize beams\n", + "beams = [(tokenized_question['input_ids'], 0.0)] # Each beam is a tuple (sequence, score)\n", + "finished_beams = []\n", + "\n", + "with torch.no_grad():\n", + " for step in range(max_length):\n", + " all_candidates = []\n", + " \n", + " for seq, score in beams:\n", + " # Get logits for the last token in the sequence\n", + " logits = model(input_ids=seq).logits[0, -1, :]\n", + " \n", + " # Apply temperature scaling\n", + " logits = logits / temperature\n", + " \n", + " # Convert logits to probabilities using softmax\n", + " probs = F.softmax(logits, dim=-1)\n", + " \n", + " # Get top-k candidate tokens and their probabilities\n", + " top_k_probs, top_k_ids = torch.topk(probs, beam_width, dim=-1)\n", + " \n", + " # Create candidates\n", + " for i in range(beam_width):\n", + " token_id = top_k_ids[0, i].item()\n", + " token_prob = top_k_probs[0, i].item()\n", + " \n", + " # Create a new sequence by appending the token to the existing sequence\n", + " new_seq = torch.cat([seq, torch.tensor([[token_id]])], dim=1)\n", + " \n", + " # Update the score (cumulative log probability)\n", + " new_score = score + torch.log(torch.tensor(token_prob))\n", + " \n", + " # If the token is the end-of-sequence token, consider it a finished beam\n", + " if token_id == tokenizer.eos_token_id:\n", + " finished_beams.append((new_seq, new_score))\n", + " else:\n", + " all_candidates.append((new_seq, new_score))\n", + " \n", + " # Sort candidates by score (highest first) and select top-k\n", + " beams = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:beam_width]\n", + " \n", + " # If no beams are left to explore, break\n", + " if not beams:\n", + " break\n", + " \n", + " # If we have finished beams, we can stop early\n", + " if len(finished_beams) >= beam_width:\n", + " break\n", + "\n", + "# Decode and print the top finished sequences\n", + "for seq, score in sorted(finished_beams, key=lambda x: x[1], reverse=True):\n", + " generated_text = tokenizer.decode(seq[0], skip_special_tokens=True)\n", + " print(f\"Generated text: {generated_text}\")\n", + " print(f\"Score: {score.item()}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "218621c4-2b18-4e6e-8fba-adde97038ae8", + "metadata": {}, + "outputs": [], + "source": [ + "WANDB_MODE=offline CUDA_VISIBLE_DEVICES= accelerate launch --mixed_precision bf16 finetune_QA.py \\\n", + "--model_direction ltr \\\n", + "--checkpoint_path /home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt \\\n", + "--tokenizer_name distilbert/distilbert-base-uncased \\\n", + "--warmup_steps 100 \\\n", + "--learning_rate 1e-5 \\\n", + "--per_device_train_batch_size 128 \\\n", + "--per_device_eval_batch_size 128 \\\n", + "--output_dir checkpoints/qa_distilbert_base_ltr/ \\\n", + "--eval_steps 38 \\\n", + "--block_size 128 \\\n", + "--num_train_epochs 50 \\\n", + "--weight_decay 1e-4\n", + "\n", + "yes but we overfit to this example\n", + "\n", + "oh wait we did actually" + ] + }, + { + "cell_type": "markdown", + "id": "f5e37c9a-156a-4fcc-bd05-5ae6c3671e62", + "metadata": {}, + "source": [ + "wait so now it does really good because we're actually doing it the same way that we trained it?\n", + "\n", + "\n", + "\n", + "thats fine, let's train it on everything again!\n", + "\n", + "\n", + "i don't think that's true... but low perplexity!\n", + "\n", + "\n", + "\n", + "the question was supposed to be, \n", + "what happens if you swallow a watermellon seed?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Riddles.ipynb b/notebooks/Riddles.ipynb new file mode 100644 index 0000000..c2c8309 --- /dev/null +++ b/notebooks/Riddles.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "execution_state": "idle", + "id": "1ddfc692-bda7-4d38-a549-2fb0d40d437d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_rtl/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertForMaskedLM were not initialized from the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_rtl/epoch_3_checkpt and are newly initialized because the shapes did not match:\n", + "- bert.embeddings.position_embeddings.weight: found shape torch.Size([512, 768]) in the checkpoint and torch.Size([128, 768]) in the model instantiated\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "import math\n", + "import sys\n", + "\n", + "sys.path.append(\"..\")\n", + "\n", + "import torch\n", + "import transformers\n", + "\n", + "from utils import add_attn_hooks\n", + "\n", + "# tokenizer = transformers.AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(\"/home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt\", ignore_mismatched_sizes=True)\n", + "\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "text_dir = \"rtl\"\n", + "# text_dir = \"ltr\"\n", + "model = transformers.AutoModelForMaskedLM.from_pretrained(f\"/home/sipb/nlp-class-project/checkpoints/bert_base_{text_dir}/epoch_3_checkpt\", ignore_mismatched_sizes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "execution_state": "idle", + "id": "a732375b-1682-45c6-8df0-8db1458559c9", + "metadata": {}, + "outputs": [], + "source": [ + "add_attn_hooks(model, text_dir)\n", + "model.eval();" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "041d1702-5aaf-45f0-9413-4014b315d1ed", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"/home/sipb/nlp-class-project/data/riddles.txt\", \"r\") as f:\n", + " riddles = [line.rstrip() for line in f.readlines()]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "execution_state": "idle", + "id": "a4098975-2df6-4435-bc93-1a5afd6d7e68", + "metadata": {}, + "outputs": [], + "source": [ + "# CPU is fast enough\n", + "\n", + "ppls = []\n", + "for riddle in riddles:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " # batch = tokenizer([riddle], return_tensors=\"pt\")#, padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"longest\", max_length=128)\n", + " # batch[\"labels\"] = batch[\"input_ids\"]\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "execution_state": "idle", + "id": "c4a82af4-d0d8-415a-9135-3a1350c1402e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(522.113471240328, 'rtl')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "execution_state": "idle", + "id": "84a95c66-6dd3-4ccb-96a2-96f38008f70e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1007.5656859988405, 'ltr')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "execution_state": "idle", + "id": "51ed80f1-a935-42bc-8194-832f91222c45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1007.5656309474507, 'ltr')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "execution_state": "idle", + "id": "40a98c10-59c3-498a-a9e6-c23bd9437bc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "937.8557468023619" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "execution_state": "idle", + "id": "80b22ba1-e5ba-4f1e-8038-158a2c2f37a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': tensor([[ 101, 1045, 2064, 2022, 2524, 1010, 2021, 1045, 2572, 2025,\n", + " 5024, 1012, 2054, 2572, 1045, 1029, 1037, 15117, 1012, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'labels': tensor([[ 101, 1045, 2064, 2022, 2524, 1010, 2021, 1045, 2572, 2025,\n", + " 5024, 1012, 2054, 2572, 1045, 1029, 1037, 15117, 1012, 102,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100]])}" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "c68b5235-a4a7-4f38-9acb-f5072e546a96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 4., 6., 11., 6., 5., 2., 1., 1., 2., 2.]),\n", + " array([ 613.56297843, 829.36555779, 1045.16813716, 1260.97071653,\n", + " 1476.77329589, 1692.57587526, 1908.37845463, 2124.18103399,\n", + " 2339.98361336, 2555.78619272, 2771.58877209]),\n", + " <BarContainer object of 10 artists>)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.hist(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "execution_state": "idle", + "id": "8acad3ce-905d-455e-af5d-9770495f374a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ppls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86daa05b-5784-457b-b65e-8b8395128d6f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Riddles_FixedPos.ipynb b/notebooks/Riddles_FixedPos.ipynb new file mode 100644 index 0000000..5e42e0b --- /dev/null +++ b/notebooks/Riddles_FixedPos.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "execution_state": "idle", + "id": "1ddfc692-bda7-4d38-a549-2fb0d40d437d", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import os\n", + "import sys\n", + "\n", + "sys.path.append(\"..\")\n", + "\n", + "import torch\n", + "import transformers\n", + "from safetensors import safe_open\n", + "\n", + "from utils import add_attn_hooks\n", + "\n", + "# text_dir = \"rtl\"\n", + "text_dir = \"ltr\"\n", + "# tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(f\"/home/sipb/nlp-class-project/checkpoints/bert_base_{text_dir}/epoch_3_checkpt\", ignore_mismatched_sizes=True)\n", + "\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "execution_state": "idle", + "id": "eaf99031-9141-43dd-89ba-be9b8e63a1ba", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"/home/sipb/nlp-class-project/data/riddles.txt\", \"r\") as f:\n", + " riddles_qa = [line.rstrip() for line in f.readlines()]\n", + "\n", + "with open(\"/home/sipb/nlp-class-project/data/ltr_riddles.txt\", \"r\") as f:\n", + " riddles_aq = [line.rstrip() for line in f.readlines()]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "execution_state": "idle", + "id": "94da0be0-d6ef-46be-9fff-4ebf022e4fed", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_6_ltr_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_6_rtl_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_11_ltr_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_11_rtl_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_19_ltr_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_19_rtl_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_35_ltr_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_35_rtl_scratch/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr_scratch/epoch_3_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/distilbert_base_rtl_scratch/epoch_3_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + } + ], + "source": [ + "results = []\n", + "path_prefixes = [f\"bert_{size}\" for size in (6, 11, 19, 35)] + [\"distilbert_base\"]\n", + "for path_prefix in path_prefixes:\n", + " for text_dir in (\"ltr\", \"rtl\"):\n", + " checkpt_dir = f\"/home/sipb/nlp-class-project/checkpoints/{path_prefix}_{text_dir}_scratch/epoch_3_checkpt\"\n", + "\n", + "# path_prefixes = [\"distilbert_base\", \"bert_base\", \"bert_large\"]\n", + "# for path_prefix in path_prefixes:\n", + "# for text_dir in (\"ltr\", \"rtl\"):\n", + " # checkpt_dir = f\"/home/sipb/nlp-class-project/checkpoints/{path_prefix}_{text_dir}/epoch_3_checkpt\"\n", + " # model = load_checkpt(f\"/home/sipb/nlp-class-project/checkpoints/{path_prefix}_{text_dir}/epoch_3_checkpt\")\n", + " # config = transformers.AutoConfig.from_pretrained(os.path.join(checkpt_dir, \"config.json\"))\n", + " # config.max_position_embeddings = 512\n", + " try:\n", + " model = transformers.AutoModelForMaskedLM.from_pretrained(checkpt_dir)\n", + " except:\n", + " config = transformers.AutoConfig.from_pretrained(os.path.join(checkpt_dir, \"config.json\"))\n", + " config.max_position_embeddings = 512\n", + " model = transformers.AutoModelForMaskedLM.from_pretrained(checkpt_dir, config=config)\n", + " \n", + " add_attn_hooks(model, text_dir)\n", + " model.eval();\n", + "\n", + " for dataset_type, dataset in [\n", + " (\"qa\", riddles_qa),\n", + " (\"aq\", riddles_aq),\n", + " ]:\n", + " ppls = []\n", + " for riddle in dataset:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())\n", + "\n", + " results.append((sum(ppls) / len(ppls), dataset_type, text_dir, path_prefix))" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "execution_state": "idle", + "id": "bdee66ad-65ad-40c7-ac86-9a2d6b8fba02", + "metadata": {}, + "outputs": [], + "source": [ + "to_params = {\n", + " \"bert_6\": 6,\n", + " \"bert_11\": 11,\n", + " \"bert_19\": 19,\n", + " \"bert_35\": 35,\n", + " \"distilbert_base\": 67,\n", + " \"bert_base\": 110,\n", + " \"bert_large\": 335,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "execution_state": "idle", + "id": "d1668465-fe85-4310-8d88-031d4b8d361f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LTR & 6M & AQ & 2420 \\\\\n", + "RTL & 6M & AQ & 2570 \\\\\n", + "LTR & 11M & AQ & 1930 \\\\\n", + "RTL & 11M & AQ & 2710 \\\\\n", + "LTR & 19M & AQ & 2930 \\\\\n", + "RTL & 19M & AQ & 5820 \\\\\n", + "LTR & 35M & AQ & 6270 \\\\\n", + "RTL & 35M & AQ & 11600 \\\\\n", + "LTR & 67M & AQ & 9790 \\\\\n", + "RTL & 67M & AQ & 32500 \\\\\n", + "LTR & 6M & QA & 1960 \\\\\n", + "RTL & 6M & QA & 1770 \\\\\n", + "LTR & 11M & QA & 1630 \\\\\n", + "RTL & 11M & QA & 1710 \\\\\n", + "LTR & 19M & QA & 2610 \\\\\n", + "RTL & 19M & QA & 3330 \\\\\n", + "LTR & 35M & QA & 5080 \\\\\n", + "RTL & 35M & QA & 5410 \\\\\n", + "LTR & 67M & QA & 7160 \\\\\n", + "RTL & 67M & QA & 27600 \\\\\n" + ] + } + ], + "source": [ + "for ppl, task, text_dir, path_prefix in sorted(results, key=lambda x: (x[1], to_params[x[3]], x[2])):\n", + " ppl = int(float(f\"{ppl:.3g}\"))\n", + " print(rf\"{text_dir.upper()} & {to_params[path_prefix]}M & {task.upper()} & {ppl} \\\\\")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "execution_state": "idle", + "id": "8894ca16-58e3-4448-bec8-c962f5135737", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the more you take, the more you leave behind. what am i? @ @ @ @ @ @ @ @ @ @ @ @ @ the @ @ @ @ ( the the the. @ the @ @ ( @ @ ( @ @ @ @ ( the.. @ ( @ ) @ the @ the the\n" + ] + } + ], + "source": [ + "# input_text = [\"The more you take, the more you leave behind. What am I?\"]\n", + "# batch = tokenizer(input_text, return_tensors=\"pt\", padding_side=\"right\", padding=\"max_length\", max_length=64)\n", + "# output_ids = model.generate(batch['input_ids'], max_length=128, do_sample=False) # do_sample=False ensures greedy decoding\n", + "# decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + "# print(decoded_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "execution_state": "idle", + "id": "a4098975-2df6-4435-bc93-1a5afd6d7e68", + "metadata": {}, + "outputs": [], + "source": [ + "# CPU is fast enough\n", + "\n", + "ppls = []\n", + "for riddle in riddles:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " # batch = tokenizer([riddle], return_tensors=\"pt\")#, padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"longest\", max_length=128)\n", + " # batch[\"labels\"] = batch[\"input_ids\"]\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "c68b5235-a4a7-4f38-9acb-f5072e546a96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 4., 6., 11., 6., 5., 2., 1., 1., 2., 2.]),\n", + " array([ 613.56297843, 829.36555779, 1045.16813716, 1260.97071653,\n", + " 1476.77329589, 1692.57587526, 1908.37845463, 2124.18103399,\n", + " 2339.98361336, 2555.78619272, 2771.58877209]),\n", + " <BarContainer object of 10 artists>)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.hist(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86daa05b-5784-457b-b65e-8b8395128d6f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Riddles_FixedPos_QAChars.ipynb b/notebooks/Riddles_FixedPos_QAChars.ipynb new file mode 100644 index 0000000..0283bb2 --- /dev/null +++ b/notebooks/Riddles_FixedPos_QAChars.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "execution_state": "idle", + "id": "1ddfc692-bda7-4d38-a549-2fb0d40d437d", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import os\n", + "import re\n", + "import sys\n", + "\n", + "sys.path.append(\"..\")\n", + "\n", + "import torch\n", + "import transformers\n", + "from safetensors import safe_open\n", + "\n", + "from utils import add_attn_hooks\n", + "\n", + "# text_dir = \"rtl\"\n", + "text_dir = \"ltr\"\n", + "# tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(f\"/home/sipb/nlp-class-project/checkpoints/bert_base_{text_dir}/epoch_3_checkpt\", ignore_mismatched_sizes=True)\n", + "\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "execution_state": "idle", + "id": "5bd236ae-119c-4ea6-9a0f-03272f528caf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "execution_state": "idle", + "id": "eaf99031-9141-43dd-89ba-be9b8e63a1ba", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"/home/sipb/nlp-class-project/data/riddles.txt\", \"r\") as f:\n", + " riddles = [line.rstrip() for line in f.readlines()]\n", + "# with open(\"/home/sipb/nlp-class-project/data/ltr_riddles.txt\", \"r\") as f:\n", + "# riddles_aq = [line.rstrip() for line in f.readlines()]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "execution_state": "idle", + "id": "9652550f-61b9-4b9b-ad10-9d9873a9e80b", + "metadata": {}, + "outputs": [], + "source": [ + "pattern = r\"^(.*)(What am I\\?\\s*)(.+)$\"\n", + "\n", + "riddles_qa = []\n", + "riddles_aq = []\n", + "for riddle in riddles:\n", + " riddles_qa.append(re.sub(pattern, r\"Q: \\1\\2A: \\3\", riddle))\n", + " riddles_aq.append(re.sub(pattern, r\"A: \\3 Q: \\1\\2\", riddle).rstrip())" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "execution_state": "idle", + "id": "94da0be0-d6ef-46be-9fff-4ebf022e4fed", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/distilbert_base_ltr/epoch_3_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/distilbert_base_rtl/epoch_3_checkpt were not used when initializing DistilBertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing DistilBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_ltr/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_rtl/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_large_ltr/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_large_rtl/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + } + ], + "source": [ + "results = []\n", + "# path_prefixes = [f\"bert_{size}\" for size in (6, 11, 19, 35)] + [\"distilbert_base\"]\n", + "# for path_prefix in path_prefixes:\n", + "# for text_dir in (\"ltr\", \"rtl\"):\n", + "# checkpt_dir = f\"/home/sipb/nlp-class-project/checkpoints/{path_prefix}_{text_dir}_scratch/epoch_3_checkpt\"\n", + "\n", + "path_prefixes = [\"distilbert_base\", \"bert_base\", \"bert_large\"]\n", + "for path_prefix in path_prefixes:\n", + " for text_dir in (\"ltr\", \"rtl\"):\n", + " checkpt_dir = f\"/home/sipb/nlp-class-project/checkpoints/{path_prefix}_{text_dir}/epoch_3_checkpt\"\n", + " try:\n", + " model = transformers.AutoModelForMaskedLM.from_pretrained(checkpt_dir)\n", + " except:\n", + " config = transformers.AutoConfig.from_pretrained(os.path.join(checkpt_dir, \"config.json\"))\n", + " config.max_position_embeddings = 512\n", + " model = transformers.AutoModelForMaskedLM.from_pretrained(checkpt_dir, config=config)\n", + " \n", + " add_attn_hooks(model, text_dir)\n", + " model.eval();\n", + "\n", + " for dataset_type, dataset in [\n", + " (\"qa\", riddles_qa),\n", + " (\"aq\", riddles_aq),\n", + " ]:\n", + " ppls = []\n", + " for riddle in dataset:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())\n", + "\n", + " results.append((sum(ppls) / len(ppls), dataset_type, text_dir, path_prefix))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "execution_state": "idle", + "id": "bdee66ad-65ad-40c7-ac86-9a2d6b8fba02", + "metadata": {}, + "outputs": [], + "source": [ + "to_params = {\n", + " \"bert_6\": 6,\n", + " \"bert_11\": 11,\n", + " \"bert_19\": 19,\n", + " \"bert_35\": 35,\n", + " \"distilbert_base\": 67,\n", + " \"bert_base\": 110,\n", + " \"bert_large\": 335,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "execution_state": "idle", + "id": "d1668465-fe85-4310-8d88-031d4b8d361f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LTR & 6M & AQ & 1980 \\\\\n", + "RTL & 6M & AQ & 1440 \\\\\n", + "LTR & 11M & AQ & 1600 \\\\\n", + "RTL & 11M & AQ & 1490 \\\\\n", + "LTR & 19M & AQ & 2310 \\\\\n", + "RTL & 19M & AQ & 2740 \\\\\n", + "LTR & 35M & AQ & 3650 \\\\\n", + "RTL & 35M & AQ & 4090 \\\\\n", + "LTR & 67M & AQ & 6360 \\\\\n", + "RTL & 67M & AQ & 22900 \\\\\n", + "LTR & 6M & QA & 1790 \\\\\n", + "RTL & 6M & QA & 1850 \\\\\n", + "LTR & 11M & QA & 1430 \\\\\n", + "RTL & 11M & QA & 1820 \\\\\n", + "LTR & 19M & QA & 2280 \\\\\n", + "RTL & 19M & QA & 3740 \\\\\n", + "LTR & 35M & QA & 3690 \\\\\n", + "RTL & 35M & QA & 4650 \\\\\n", + "LTR & 67M & QA & 6340 \\\\\n", + "RTL & 67M & QA & 22900 \\\\\n" + ] + } + ], + "source": [ + "for ppl, task, text_dir, path_prefix in sorted(results, key=lambda x: (x[1], to_params[x[3]], x[2])):\n", + " ppl = int(float(f\"{ppl:.3g}\"))\n", + " print(rf\"{text_dir.upper()} & {to_params[path_prefix]}M & {task.upper()} & {ppl} \\\\\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "execution_state": "idle", + "id": "e9ac0af8-2638-4076-b0cc-9ec9355c2c01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LTR & 67M & AQ & 969 \\\\\n", + "RTL & 67M & AQ & 675 \\\\\n", + "LTR & 110M & AQ & 1880 \\\\\n", + "RTL & 110M & AQ & 483 \\\\\n", + "LTR & 335M & AQ & 2610 \\\\\n", + "RTL & 335M & AQ & 695 \\\\\n", + "LTR & 67M & QA & 952 \\\\\n", + "RTL & 67M & QA & 781 \\\\\n", + "LTR & 110M & QA & 2020 \\\\\n", + "RTL & 110M & QA & 689 \\\\\n", + "LTR & 335M & QA & 2950 \\\\\n", + "RTL & 335M & QA & 782 \\\\\n" + ] + } + ], + "source": [ + "for ppl, task, text_dir, path_prefix in sorted(results, key=lambda x: (x[1], to_params[x[3]], x[2])):\n", + " ppl = int(float(f\"{ppl:.3g}\"))\n", + " print(rf\"{text_dir.upper()} & {to_params[path_prefix]}M & {task.upper()} & {ppl} \\\\\")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "execution_state": "idle", + "id": "8894ca16-58e3-4448-bec8-c962f5135737", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the more you take, the more you leave behind. what am i? @ @ @ @ @ @ @ @ @ @ @ @ @ the @ @ @ @ ( the the the. @ the @ @ ( @ @ ( @ @ @ @ ( the.. @ ( @ ) @ the @ the the\n" + ] + } + ], + "source": [ + "# input_text = [\"The more you take, the more you leave behind. What am I?\"]\n", + "# batch = tokenizer(input_text, return_tensors=\"pt\", padding_side=\"right\", padding=\"max_length\", max_length=64)\n", + "# output_ids = model.generate(batch['input_ids'], max_length=128, do_sample=False) # do_sample=False ensures greedy decoding\n", + "# decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + "# print(decoded_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "execution_state": "idle", + "id": "a4098975-2df6-4435-bc93-1a5afd6d7e68", + "metadata": {}, + "outputs": [], + "source": [ + "# CPU is fast enough\n", + "\n", + "ppls = []\n", + "for riddle in riddles:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " # batch = tokenizer([riddle], return_tensors=\"pt\")#, padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"longest\", max_length=128)\n", + " # batch[\"labels\"] = batch[\"input_ids\"]\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "c68b5235-a4a7-4f38-9acb-f5072e546a96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 4., 6., 11., 6., 5., 2., 1., 1., 2., 2.]),\n", + " array([ 613.56297843, 829.36555779, 1045.16813716, 1260.97071653,\n", + " 1476.77329589, 1692.57587526, 1908.37845463, 2124.18103399,\n", + " 2339.98361336, 2555.78619272, 2771.58877209]),\n", + " <BarContainer object of 10 artists>)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.hist(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86daa05b-5784-457b-b65e-8b8395128d6f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Right_to_Left_NLP.ipynb b/notebooks/Right_to_Left_NLP.ipynb index bcbc57a..986ef22 100644 --- a/notebooks/Right_to_Left_NLP.ipynb +++ b/notebooks/Right_to_Left_NLP.ipynb @@ -1,373 +1,383 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "M29-oTOBIiMr" + }, + "outputs": [], + "source": [ + "%pip install datasets torch transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "id": "2iJJyERxHWSO", + "outputId": "04e9bc9d-5ee9-48d5-f370-6fd66ec7b7c1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpu\n" + ] } + ], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import transformers\n", + "from datasets import load_dataset\n", + "\n", + "transformers.set_seed(42)\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "print(device)" + ] }, - "cells": [ - { - "cell_type": "code", - "source": [ - "%pip install datasets torch transformers" - ], - "metadata": { - "collapsed": true, - "id": "M29-oTOBIiMr" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2iJJyERxHWSO", - "outputId": "04e9bc9d-5ee9-48d5-f370-6fd66ec7b7c1" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "cpu\n" - ] - } - ], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import transformers\n", - "from datasets import load_dataset\n", - "\n", - "transformers.set_seed(42)\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", - "print(device)" - ] - }, - { - "cell_type": "code", - "source": [ - "model_name_or_path = \"bert-base-uncased\"\n", - "model = transformers.AutoModelForMaskedLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16, attn_implementation=\"sdpa\")\n", - "model.eval()\n", - "\n", - "tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "C5PdaHGWHuXG", - "outputId": "d15272a5-1ce1-4c7e-9004-fc686a3de6b9" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n", - "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n", - "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", - " warnings.warn(\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "def ltr_mask(seq_len: int) -> torch.Tensor:\n", - " mask = torch.ones((seq_len, seq_len), dtype=bool)\n", - " return torch.tril(mask, diagonal=-1)\n", - "\n", - "def rtl_mask(seq_len: int) -> torch.Tensor:\n", - " return ltr_mask(seq_len).T" - ], - "metadata": { - "id": "H_AUjBRoJHXU" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "model.register_buffer(\"attn_mask\", rtl_mask(model.config.max_position_embeddings).to(model.device))\n", - "\n", - "def attn_hook(attn_module: nn.Module, args: tuple, kwargs: dict):\n", - " \"\"\"\n", - " Assuming https://github.com/huggingface/transformers/blob/33868a057c02f0368ba63bd1edb746be38fe3d90/src/transformers/models/bert/modeling_bert.py#L515\n", - " so no `kwargs` and `attention_mask` is second positional arg.\n", - "\n", - " Uses global `model.attn_mask` to save memory.\n", - " \"\"\"\n", - " assert not kwargs\n", - "\n", - " args = list(args)\n", - " assert args[1].size()[-2:] == model.attn_mask.size(), f\"{args[1].size()=} {model.attn_mask.size()=}\"\n", - " args[1] = model.attn_mask\n", - " return tuple(args), kwargs\n", - "\n", - "def debug_inputs_hook(attn_module: nn.Module, args: tuple, output):\n", - " print(f\"Post-forward checks\")\n", - " assert torch.equal(args[1], model.attn_mask), (args[1], model.attn_mask)" - ], - "metadata": { - "id": "Oy27MZcLLLsD" - }, - "execution_count": null, - "outputs": [] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "C5PdaHGWHuXG", + "outputId": "d15272a5-1ce1-4c7e-9004-fc686a3de6b9" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# attn_mask = rtl_mask(model.config.max_position_embeddings)\n", - "for name, module in model.named_modules():\n", - " if isinstance(module, transformers.models.bert.modeling_bert.BertSelfAttention):\n", - " module._forward_hooks.clear() # running multiple times right now during testing\n", - " module.register_forward_pre_hook(attn_hook, with_kwargs=True)\n", - " module.register_forward_hook(debug_inputs_hook)\n", - " # module.register_buffer(\"attn_mask\", attn_mask)\n", - "\n", - "model = model.to(device)" - ], - "metadata": { - "id": "anEdwKj_OWWy" - }, - "execution_count": null, - "outputs": [] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n", + "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model_name_or_path = \"bert-base-uncased\"\n", + "model = transformers.AutoModelForMaskedLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16, attn_implementation=\"sdpa\")\n", + "model.eval()\n", + "\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H_AUjBRoJHXU" + }, + "outputs": [], + "source": [ + "def ltr_mask(seq_len: int) -> torch.Tensor:\n", + " mask = torch.ones((seq_len, seq_len), dtype=bool)\n", + " return torch.tril(mask, diagonal=-1)\n", + "\n", + "def rtl_mask(seq_len: int) -> torch.Tensor:\n", + " return ltr_mask(seq_len).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Oy27MZcLLLsD" + }, + "outputs": [], + "source": [ + "model.register_buffer(\"attn_mask\", rtl_mask(model.config.max_position_embeddings).to(model.device))\n", + "\n", + "def attn_hook(attn_module: nn.Module, args: tuple, kwargs: dict):\n", + " \"\"\"\n", + " Assuming https://github.com/huggingface/transformers/blob/33868a057c02f0368ba63bd1edb746be38fe3d90/src/transformers/models/bert/modeling_bert.py#L515\n", + " so no `kwargs` and `attention_mask` is second positional arg.\n", + "\n", + " Uses global `model.attn_mask` to save memory.\n", + " \"\"\"\n", + " assert not kwargs\n", + "\n", + " args = list(args)\n", + " assert args[1].size()[-2:] == model.attn_mask.size(), f\"{args[1].size()=} {model.attn_mask.size()=}\"\n", + " args[1] = model.attn_mask\n", + " return tuple(args), kwargs\n", + "\n", + "def debug_inputs_hook(attn_module: nn.Module, args: tuple, output):\n", + " print(f\"Post-forward checks\")\n", + " assert torch.equal(args[1], model.attn_mask), (args[1], model.attn_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "anEdwKj_OWWy" + }, + "outputs": [], + "source": [ + "# attn_mask = rtl_mask(model.config.max_position_embeddings)\n", + "for name, module in model.named_modules():\n", + " if isinstance(module, transformers.models.bert.modeling_bert.BertSelfAttention):\n", + " module._forward_hooks.clear() # running multiple times right now during testing\n", + " module.register_forward_pre_hook(attn_hook, with_kwargs=True)\n", + " module.register_forward_hook(debug_inputs_hook)\n", + " # module.register_buffer(\"attn_mask\", attn_mask)\n", + "\n", + "model = model.to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P1BEQFsLIRfX" + }, + "outputs": [], + "source": [ + "ds = load_dataset(\"Salesforce/wikitext\", \"wikitext-103-v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "BHE26Mr2NXhH", + "outputId": "24569931-61d7-4752-8b08-4daef58f9798" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "ds = load_dataset(\"Salesforce/wikitext\", \"wikitext-103-v1\")" - ], - "metadata": { - "id": "P1BEQFsLIRfX" - }, - "execution_count": null, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n" + ] }, { - "cell_type": "code", - "source": [ - "train_ds = ds[\"train\"]\n", - "inputs = tokenizer(train_ds[5][\"text\"], return_tensors=\"pt\", padding='max_length', truncation=True)\n", - "inputs = {key: val.to(device) for key, val in inputs.items()}\n", - "\n", - "with torch.no_grad():\n", - " outputs = model(**inputs)\n", - "\n", - "outputs.logits" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BHE26Mr2NXhH", - "outputId": "24569931-61d7-4752-8b08-4daef58f9798" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "tensor([[[-5.6250, -5.5938, -5.5938, ..., -5.4688, -4.9688, -2.4844],\n", - " [-9.2500, -8.9375, -9.3750, ..., -8.5000, -7.5000, -4.0312],\n", - " [-4.9062, -4.8750, -5.2812, ..., -5.0625, -4.4375, -1.8281],\n", - " ...,\n", - " [-5.5938, -5.7500, -5.7812, ..., -6.1562, -3.9688, -2.2812],\n", - " [-4.7188, -4.8750, -4.8750, ..., -5.0625, -3.4531, -2.4375],\n", - " [-4.1875, -3.9375, -3.9062, ..., -3.3438, -3.2344, -3.2031]]],\n", - " device='cuda:0', dtype=torch.bfloat16)" - ] - }, - "metadata": {}, - "execution_count": 9 - } + "data": { + "text/plain": [ + "tensor([[[-5.6250, -5.5938, -5.5938, ..., -5.4688, -4.9688, -2.4844],\n", + " [-9.2500, -8.9375, -9.3750, ..., -8.5000, -7.5000, -4.0312],\n", + " [-4.9062, -4.8750, -5.2812, ..., -5.0625, -4.4375, -1.8281],\n", + " ...,\n", + " [-5.5938, -5.7500, -5.7812, ..., -6.1562, -3.9688, -2.2812],\n", + " [-4.7188, -4.8750, -4.8750, ..., -5.0625, -3.4531, -2.4375],\n", + " [-4.1875, -3.9375, -3.9062, ..., -3.3438, -3.2344, -3.2031]]],\n", + " device='cuda:0', dtype=torch.bfloat16)" ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_ds = ds[\"train\"]\n", + "inputs = tokenizer(train_ds[5][\"text\"], return_tensors=\"pt\", padding='max_length', truncation=True)\n", + "inputs = {key: val.to(device) for key, val in inputs.items()}\n", + "\n", + "with torch.no_grad():\n", + " outputs = model(**inputs)\n", + "\n", + "outputs.logits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ZtEm7eQQNi4e", + "outputId": "c0eb3925-6d48-480e-a853-5057f35dbcd2" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "with torch.inference_mode():\n", - " model.register_buffer(\"attn_mask\", ltr_mask(model.config.max_position_embeddings).to(model.device))\n", - " outputs = model(**inputs)\n", - "\n", - "outputs.logits" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZtEm7eQQNi4e", - "outputId": "c0eb3925-6d48-480e-a853-5057f35dbcd2" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "tensor([[[-7.9062, -7.7812, -7.9062, ..., -7.1250, -7.8438, -4.8438],\n", - " [-7.1562, -7.1250, -7.2812, ..., -7.3750, -7.3750, -7.2500],\n", - " [-5.4062, -5.2188, -5.4375, ..., -5.3438, -4.3750, -5.0312],\n", - " ...,\n", - " [ 3.9844, 3.6406, 3.6406, ..., 3.8281, 2.9062, 5.2812],\n", - " [ 4.0938, 3.7812, 3.8281, ..., 4.0000, 2.9844, 5.5000],\n", - " [ 3.8281, 3.5312, 3.5156, ..., 4.1562, 2.8438, 4.7188]]],\n", - " device='cuda:0', dtype=torch.bfloat16)" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n" + ] }, { - "cell_type": "code", - "source": [ - "with torch.inference_mode():\n", - " model.register_buffer(\"attn_mask\", rtl_mask(model.config.max_position_embeddings).to(model.device))\n", - " outputs = model(**inputs)\n", - "\n", - "outputs.logits" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nz0j7V3oNkZu", - "outputId": "939b1d6d-5dca-41ef-eb17-9e0f4d09629e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n", - "Post-forward checks\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "tensor([[[-5.6250, -5.5938, -5.5938, ..., -5.4688, -4.9688, -2.4844],\n", - " [-9.2500, -8.9375, -9.3750, ..., -8.5000, -7.5000, -4.0312],\n", - " [-4.9062, -4.8750, -5.2812, ..., -5.0625, -4.4375, -1.8281],\n", - " ...,\n", - " [-5.5938, -5.7500, -5.7812, ..., -6.1562, -3.9688, -2.2812],\n", - " [-4.7188, -4.8750, -4.8750, ..., -5.0625, -3.4531, -2.4375],\n", - " [-4.1875, -3.9375, -3.9062, ..., -3.3438, -3.2344, -3.2031]]],\n", - " device='cuda:0', dtype=torch.bfloat16)" - ] - }, - "metadata": {}, - "execution_count": 11 - } + "data": { + "text/plain": [ + "tensor([[[-7.9062, -7.7812, -7.9062, ..., -7.1250, -7.8438, -4.8438],\n", + " [-7.1562, -7.1250, -7.2812, ..., -7.3750, -7.3750, -7.2500],\n", + " [-5.4062, -5.2188, -5.4375, ..., -5.3438, -4.3750, -5.0312],\n", + " ...,\n", + " [ 3.9844, 3.6406, 3.6406, ..., 3.8281, 2.9062, 5.2812],\n", + " [ 4.0938, 3.7812, 3.8281, ..., 4.0000, 2.9844, 5.5000],\n", + " [ 3.8281, 3.5312, 3.5156, ..., 4.1562, 2.8438, 4.7188]]],\n", + " device='cuda:0', dtype=torch.bfloat16)" ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with torch.inference_mode():\n", + " model.register_buffer(\"attn_mask\", ltr_mask(model.config.max_position_embeddings).to(model.device))\n", + " outputs = model(**inputs)\n", + "\n", + "outputs.logits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "nz0j7V3oNkZu", + "outputId": "939b1d6d-5dca-41ef-eb17-9e0f4d09629e" + }, + "outputs": [ { - "cell_type": "code", - "source": [], - "metadata": { - "id": "82PpSWnrdMgu" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Wikipedia test\n", - "from datasets import load_dataset\n", - "\n", - "ds = load_dataset(\"wikimedia/wikipedia\", \"20231101.en\")\n", - "print(ds[\"train\"][1000])" - ], - "metadata": { - "id": "DHftDnPKdMjV" - }, - "execution_count": null, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n", + "Post-forward checks\n" + ] }, { - "cell_type": "code", - "source": [], - "metadata": { - "id": "FTMkfLyKdMqu" - }, - "execution_count": null, - "outputs": [] + "data": { + "text/plain": [ + "tensor([[[-5.6250, -5.5938, -5.5938, ..., -5.4688, -4.9688, -2.4844],\n", + " [-9.2500, -8.9375, -9.3750, ..., -8.5000, -7.5000, -4.0312],\n", + " [-4.9062, -4.8750, -5.2812, ..., -5.0625, -4.4375, -1.8281],\n", + " ...,\n", + " [-5.5938, -5.7500, -5.7812, ..., -6.1562, -3.9688, -2.2812],\n", + " [-4.7188, -4.8750, -4.8750, ..., -5.0625, -3.4531, -2.4375],\n", + " [-4.1875, -3.9375, -3.9062, ..., -3.3438, -3.2344, -3.2031]]],\n", + " device='cuda:0', dtype=torch.bfloat16)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } - ] -}
\ No newline at end of file + ], + "source": [ + "with torch.inference_mode():\n", + " model.register_buffer(\"attn_mask\", rtl_mask(model.config.max_position_embeddings).to(model.device))\n", + " outputs = model(**inputs)\n", + "\n", + "outputs.logits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "82PpSWnrdMgu" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DHftDnPKdMjV" + }, + "outputs": [], + "source": [ + "# Wikipedia test\n", + "from datasets import load_dataset\n", + "\n", + "ds = load_dataset(\"wikimedia/wikipedia\", \"20231101.en\")\n", + "print(ds[\"train\"][1000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FTMkfLyKdMqu" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Stat_Tests.ipynb b/notebooks/Stat_Tests.ipynb new file mode 100644 index 0000000..379df3c --- /dev/null +++ b/notebooks/Stat_Tests.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "execution_state": "idle", + "id": "d3616030-9841-4eeb-a1c0-b4fa591e2fe1", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "execution_state": "idle", + "id": "e0ecfabf-f3ad-4ac3-a8de-97e71f9aff5a", + "metadata": {}, + "outputs": [], + "source": [ + "small_6M_ppls = [\n", + " (116.7, 114.9), # (LTR ppl, RTL ppl)\n", + " (117.4, 114.4),\n", + " (116.7, 115.0),\n", + " (117.4, 115.4),\n", + " (117.5, 113.8),\n", + " (116.1, 114.0)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "execution_state": "idle", + "id": "bacb5848-af91-4443-9d71-81b6cb0e6aa6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TtestResult(statistic=6.996032521277681, pvalue=1.9799032618443016e-05, df=9.885322295882748)\n", + "TtestResult(statistic=7.357072921297962, pvalue=0.0003642557582314903, df=5)\n" + ] + } + ], + "source": [ + "# One-sided unpaired Welch t-test\n", + "small_ltr_ppls, small_rtl_ppls = np.array(small_6M_ppls).T\n", + "print(scipy.stats.ttest_ind(small_ltr_ppls, small_rtl_ppls, equal_var=False, alternative=\"greater\"))\n", + "# Paired t-test\n", + "print(scipy.stats.ttest_rel(small_ltr_ppls, small_rtl_ppls, alternative=\"greater\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "execution_state": "idle", + "id": "c23d0ecf-c0f0-4a61-ac99-79c9c91c1d76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0010822510822510823" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Permutation test\n", + "def statistic(x, y):\n", + " return np.mean(x) - np.mean(y)\n", + "\n", + "scipy.stats.permutation_test((small_ltr_ppls, small_rtl_ppls), statistic, n_resamples=np.inf, alternative=\"greater\").pvalue" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "execution_state": "idle", + "id": "d2bc01d6-821e-4a31-acd2-9d54592ab095", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0010822510822510823" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "execution_state": "idle", + "id": "d59a5f23-7306-478b-87cc-4a064675c48d", + "metadata": {}, + "outputs": [], + "source": [ + "small_6M_losses = [\n", + " (4.761364663504469, 4.744475745069383),\n", + " (4.76577, 4.73966), # (LTR loss, RTL loss)\n", + " (4.7599, 4.74497),\n", + " (4.76553, 4.74848),\n", + " (4.76638, 4.73508),\n", + " (4.75452, 4.73628)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "execution_state": "idle", + "id": "ab613d50-98a3-409d-92c2-123943a10c39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TtestResult(statistic=7.221951304385972, pvalue=1.5856791940777708e-05, df=9.802083740534792)\n", + "TtestResult(statistic=7.866415444091634, pvalue=0.00026667826712639355, df=5)\n", + "0.0010822510822510823\n" + ] + } + ], + "source": [ + "small_ltr_losses, small_rtl_losses = np.array(small_6M_losses).T\n", + "print(scipy.stats.ttest_ind(small_ltr_losses, small_rtl_losses, equal_var=False, alternative=\"greater\"))\n", + "print(scipy.stats.ttest_rel(small_ltr_losses, small_rtl_losses, alternative=\"greater\"))\n", + "print(scipy.stats.permutation_test((small_ltr_losses, small_rtl_losses), statistic, n_resamples=np.inf, alternative=\"greater\").pvalue)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83aec592-de6e-43b9-9420-4b211641b75f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 23, + "execution_state": "idle", + "id": "ea396600-66d9-45b4-b36c-4099c5548dec", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "execution_state": "idle", + "id": "c46997aa-65b5-4c49-8fa8-5c60d33362f5", + "metadata": {}, + "outputs": [], + "source": [ + "csv_filename = '../data/wandb_export_2024-12-04T19_56_43.325-05_00.csv'\n", + "df = pd.read_csv(csv_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "execution_state": "idle", + "id": "7fcd8146-99cc-4322-8338-b58e58b36a30", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop([0, 1, 10, 11, 12, 15,16,17,18,19])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "execution_state": "idle", + "id": "ae6a65ee-1300-4d79-b268-c5ebae8a3a99", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"val_ppl\"] = np.e ** df[\"val_loss\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "execution_state": "idle", + "id": "e1ebf94e-d645-4309-addc-aece1cc703ac", + "metadata": {}, + "outputs": [], + "source": [ + "transferred_ppl = np.array([24.4, 24.4, 21.9, 21.8, 18.1, 17.7]) # (rtl, ltr, rtl, ltr, ...)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "execution_state": "idle", + "id": "79f2b427-d430-4aa4-9cef-8dc4d3d9b00b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.legend.Legend at 0x7f40a395bc20>" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Cringe hardcoding\n", + "plt.plot([6, 11, 19, 35, 67], np.array(df[\"val_ppl\"])[1::2] - np.array(df[\"val_ppl\"])[::2], linewidth=2, label=\"Scratch\")\n", + "plt.plot([67, 110, 335], transferred_ppl[1::2] - transferred_ppl[::2], linewidth=2, label=\"Transferred\")\n", + "plt.axhline(y=0, color='gray', linestyle='--')\n", + "plt.xlabel(\"Parameter Count (in millions)\")\n", + "plt.ylabel(\"RTL PPL - LTR PPL\")\n", + "plt.legend()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "execution_state": "idle", + "id": "66ef4460-3a03-4017-845a-9ff04733985e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.legend.Legend at 0x7f40a33cdf40>" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Cringe hardcoding\n", + "plt.plot([6, 11, 19, 35, 67], np.array(df[\"val_loss\"])[1::2] - np.array(df[\"val_loss\"])[::2], linewidth=2, marker=\"o\", label=\"Scratch\")\n", + "plt.plot([67, 110, 335], np.log(transferred_ppl[1::2]) - np.log(transferred_ppl[::2]), linewidth=2, marker=\"o\", label=\"Transferred\")\n", + "plt.axhline(y=0, color='gray', linestyle='--')\n", + "plt.xlabel(\"Parameter Count (in millions)\")\n", + "plt.ylabel(\"RTL Loss - LTR Loss\")\n", + "plt.title(\"RTL and LTR Loss Difference Across Model Size\")\n", + "plt.legend()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "execution_state": "idle", + "id": "b492b7f4-7e93-444b-b5ba-1bdb5b6403f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>val_loss</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>distilbert_base_japan_rtl</td>\n", + " <td>2.832614</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>distilbert_base_japan_ltr</td>\n", + " <td>2.823765</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>bert_6M_rtl_scratch</td>\n", + " <td>4.744476</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>bert_6_ltr_scratch</td>\n", + " <td>4.761365</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>bert_11_rtl_scratch</td>\n", + " <td>4.446950</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>bert_11_ltr_scratch</td>\n", + " <td>4.462379</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>bert_19_rtl_scratch</td>\n", + " <td>4.177320</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>bert_19_ltr_scratch</td>\n", + " <td>4.186271</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>bert_35_rtl_scratch</td>\n", + " <td>3.927857</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>bert_35_ltr_scratch</td>\n", + " <td>3.941595</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>qa_distilbert_base_ltr_v2</td>\n", + " <td>3.150267</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>qa_distilbert_base_rtl_v2</td>\n", + " <td>3.190452</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>qa_ltr_distilbert_base</td>\n", + " <td>3.325950</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>distilbert_base_ltr_scratch</td>\n", + " <td>3.686307</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>distilbert_base_rtl_scratch</td>\n", + " <td>3.688566</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>deep-monkey-11</td>\n", + " <td>3.009245</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>distilbert_base_ltr_4epoch</td>\n", + " <td>3.196100</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>distilbert_base_rtl_4epoch</td>\n", + " <td>3.193662</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>bert_base_ltr_4epoch</td>\n", + " <td>3.082236</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>bert_base_rtl_4epoch</td>\n", + " <td>3.088110</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name val_loss\n", + "0 distilbert_base_japan_rtl 2.832614\n", + "1 distilbert_base_japan_ltr 2.823765\n", + "2 bert_6M_rtl_scratch 4.744476\n", + "3 bert_6_ltr_scratch 4.761365\n", + "4 bert_11_rtl_scratch 4.446950\n", + "5 bert_11_ltr_scratch 4.462379\n", + "6 bert_19_rtl_scratch 4.177320\n", + "7 bert_19_ltr_scratch 4.186271\n", + "8 bert_35_rtl_scratch 3.927857\n", + "9 bert_35_ltr_scratch 3.941595\n", + "10 qa_distilbert_base_ltr_v2 3.150267\n", + "11 qa_distilbert_base_rtl_v2 3.190452\n", + "12 qa_ltr_distilbert_base 3.325950\n", + "13 distilbert_base_ltr_scratch 3.686307\n", + "14 distilbert_base_rtl_scratch 3.688566\n", + "15 deep-monkey-11 3.009245\n", + "16 distilbert_base_ltr_4epoch 3.196100\n", + "17 distilbert_base_rtl_4epoch 3.193662\n", + "18 bert_base_ltr_4epoch 3.082236\n", + "19 bert_base_rtl_4epoch 3.088110" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " pd.read_csv(csv_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fda6f102-904a-43e1-b078-258d39f1738f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/addition.ipynb b/notebooks/addition.ipynb new file mode 100644 index 0000000..01fddff --- /dev/null +++ b/notebooks/addition.ipynb @@ -0,0 +1,836 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "execution_state": "idle", + "id": "ecaeb29e-fbbe-4876-86ad-9fbadea989b0", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "assert device.type == \"cuda\", \"CUDA is not available. Please check your GPU setup.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "execution_state": "idle", + "id": "84a82827-8947-4a26-a485-56f5b1eadb4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(tensor([[4, 9, 4, 1, 8, 2],\n", + " [0, 6, 5, 9, 1, 4],\n", + " [4, 9, 1, 0, 5, 5],\n", + " [5, 2, 4, 9, 1, 8],\n", + " [2, 7, 6, 8, 0, 1]], device='cuda:0'),\n", + " tensor([[5, 7, 7],\n", + " [9, 7, 9],\n", + " [4, 4, 7],\n", + " [4, 4, 2],\n", + " [0, 8, 7]], device='cuda:0'))" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "NUM_LEN = 3\n", + "\n", + "def pad(a):\n", + " s = str(a)\n", + " if len(s) > NUM_LEN:\n", + " return s[-NUM_LEN:]\n", + " return s.zfill(NUM_LEN)\n", + "\n", + "def mkbatch_ltr(size):\n", + " data = []\n", + " labels = []\n", + " for i in range(size):\n", + " a = random.randrange(0, 10**NUM_LEN)\n", + " b = random.randrange(0, 10**NUM_LEN)\n", + " c = a + b\n", + " data.append(list(map(int, pad(a) + pad(b))))\n", + " labels.append(list(map(int, pad(c))))\n", + " return torch.tensor(data, device=device), torch.tensor(labels, device=device)\n", + "\n", + "def mkbatch_rtl(size):\n", + " data, labels = mkbatch_ltr(size)\n", + " return torch.flip(data, (1,)), torch.flip(labels, (1,))\n", + "\n", + "mkbatch_rtl(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "execution_state": "idle", + "id": "d50dce44-57b7-4d4d-895a-c2275c04234c", + "metadata": {}, + "outputs": [], + "source": [ + "class TransformerModel(nn.Module):\n", + " def __init__(self, input_dim, model_dim, output_dim, nheads, nenclayers, ndeclayers):\n", + " super().__init__()\n", + " self.emb = nn.Embedding(input_dim, model_dim - 1)\n", + " self.trans = nn.Transformer(d_model=model_dim, nhead=nheads, dim_feedforward=4 * model_dim,\n", + " num_encoder_layers=nenclayers, num_decoder_layers=ndeclayers,\n", + " dropout=0, batch_first=True)\n", + " self.output = nn.Linear(model_dim, output_dim)\n", + "\n", + " def forward(self, data, labels):\n", + " bsz = data.size(0)\n", + " data_pos = (torch.arange(2 * NUM_LEN, device=device) % NUM_LEN).expand(bsz, -1)\n", + " labels_pos = (torch.arange(NUM_LEN, device=device)).expand(bsz, -1)\n", + " data_emb = torch.cat((self.emb(data), data_pos.unsqueeze(2)), 2)\n", + " labels_emb = torch.cat((self.emb(labels), labels_pos.unsqueeze(2)), 2)\n", + " return self.output(self.trans(data_emb, labels_emb, tgt_mask=TGT_MASK, tgt_is_causal=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "execution_state": "idle", + "id": "ddad4059-b06e-4eb3-a55a-5a4a842cdd7a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data: 32768K\n", + "Trainable parameters in the model: 1251\n" + ] + } + ], + "source": [ + "MODEL_DIM = 4 # Dimension of model\n", + "VOCAB_SIZE = 10\n", + "NEPOCHS = 1000\n", + "BSZ = 2**15 # Batch size\n", + "NHEADS = 1\n", + "NENCLAYERS = 2\n", + "NDECLAYERS = 2\n", + "\n", + "LR = 1e-2\n", + "\n", + "TGT_MASK = nn.Transformer.generate_square_subsequent_mask(NUM_LEN)\n", + "model = TransformerModel(VOCAB_SIZE + 1, MODEL_DIM, VOCAB_SIZE, NHEADS, NENCLAYERS, NDECLAYERS).to(device)\n", + "\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=LR)\n", + "\n", + "train_err = []\n", + "open('loss', 'w').close()\n", + "\n", + "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n", + "print(f\"Training data: {NEPOCHS*BSZ//10**3}K\")\n", + "print(f\"Trainable parameters in the model: {trainable_params}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "execution_state": "idle", + "id": "689f2e44-da84-43ea-b539-414d6f5c37e3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0/1000 \t Train Err: 2.4793\n", + "Epoch 1/1000 \t Train Err: 2.4310\n", + "Epoch 2/1000 \t Train Err: 2.3800\n", + "Epoch 3/1000 \t Train Err: 2.3493\n", + "Epoch 4/1000 \t Train Err: 2.3288\n", + "Epoch 5/1000 \t Train Err: 2.3202\n", + "Epoch 6/1000 \t Train Err: 2.3171\n", + "Epoch 7/1000 \t Train Err: 2.3139\n", + "Epoch 8/1000 \t Train Err: 2.3095\n", + "Epoch 9/1000 \t Train Err: 2.3064\n", + "Epoch 10/1000 \t Train Err: 2.3040\n", + "Epoch 11/1000 \t Train Err: 2.3029\n", + "Epoch 12/1000 \t Train Err: 2.3030\n", + "Epoch 13/1000 \t Train Err: 2.3037\n", + "Epoch 14/1000 \t Train Err: 2.3047\n", + "Epoch 15/1000 \t Train Err: 2.3060\n", + "Epoch 16/1000 \t Train Err: 2.3067\n", + "Epoch 17/1000 \t Train Err: 2.3067\n", + "Epoch 18/1000 \t Train Err: 2.3068\n", + "Epoch 19/1000 \t Train Err: 2.3059\n", + "Epoch 20/1000 \t Train Err: 2.3060\n", + "Epoch 21/1000 \t Train Err: 2.3052\n", + "Epoch 22/1000 \t Train Err: 2.3044\n", + "Epoch 23/1000 \t Train Err: 2.3039\n", + "Epoch 24/1000 \t Train Err: 2.3039\n", + "Epoch 25/1000 \t Train Err: 2.3033\n", + "Epoch 26/1000 \t Train Err: 2.3032\n", + "Epoch 27/1000 \t Train Err: 2.3032\n", + "Epoch 28/1000 \t Train Err: 2.3032\n", + "Epoch 29/1000 \t Train Err: 2.3029\n", + "Epoch 30/1000 \t Train Err: 2.3028\n", + "Epoch 31/1000 \t Train Err: 2.3032\n", + "Epoch 32/1000 \t Train Err: 2.3031\n", + "Epoch 33/1000 \t Train Err: 2.3030\n", + "Epoch 34/1000 \t Train Err: 2.3031\n", + "Epoch 35/1000 \t Train Err: 2.3031\n", + "Epoch 36/1000 \t Train Err: 2.3031\n", + "Epoch 37/1000 \t Train Err: 2.3029\n", + "Epoch 38/1000 \t Train Err: 2.3033\n", + "Epoch 39/1000 \t Train Err: 2.3032\n", + "Epoch 40/1000 \t Train Err: 2.3031\n", + "Epoch 41/1000 \t Train Err: 2.3030\n", + "Epoch 42/1000 \t Train Err: 2.3027\n", + "Epoch 43/1000 \t Train Err: 2.3027\n", + "Epoch 44/1000 \t Train Err: 2.3027\n", + "Epoch 45/1000 \t Train Err: 2.3027\n", + "Epoch 46/1000 \t Train Err: 2.3026\n", + "Epoch 47/1000 \t Train Err: 2.3026\n", + "Epoch 48/1000 \t Train Err: 2.3027\n", + "Epoch 49/1000 \t Train Err: 2.3026\n", + "Epoch 50/1000 \t Train Err: 2.3027\n", + "Epoch 51/1000 \t Train Err: 2.3027\n", + "Epoch 52/1000 \t Train Err: 2.3027\n", + "Epoch 53/1000 \t Train Err: 2.3026\n", + "Epoch 54/1000 \t Train Err: 2.3028\n", + "Epoch 55/1000 \t Train Err: 2.3027\n", + "Epoch 56/1000 \t Train Err: 2.3026\n", + "Epoch 57/1000 \t Train Err: 2.3027\n", + "Epoch 58/1000 \t Train Err: 2.3026\n", + "Epoch 59/1000 \t Train Err: 2.3026\n", + "Epoch 60/1000 \t Train Err: 2.3027\n", + "Epoch 61/1000 \t Train Err: 2.3026\n", + "Epoch 62/1000 \t Train Err: 2.3026\n", + "Epoch 500/1000 \t Train Err: 2.3026\n", + "Epoch 501/1000 \t Train Err: 2.3025\n", + "Epoch 502/1000 \t Train Err: 2.3026\n", + "Epoch 503/1000 \t Train Err: 2.3026\n", + "Epoch 504/1000 \t Train Err: 2.3026\n", + "Epoch 505/1000 \t Train Err: 2.3026\n", + "Epoch 506/1000 \t Train Err: 2.3026\n", + "Epoch 507/1000 \t Train Err: 2.3026\n", + "Epoch 508/1000 \t Train Err: 2.3026\n", + "Epoch 509/1000 \t Train Err: 2.3026\n", + "Epoch 510/1000 \t Train Err: 2.3026\n", + "Epoch 511/1000 \t Train Err: 2.3026\n", + "Epoch 512/1000 \t Train Err: 2.3026\n", + "Epoch 513/1000 \t Train Err: 2.3026\n", + "Epoch 514/1000 \t Train Err: 2.3025\n", + "Epoch 515/1000 \t Train Err: 2.3027\n", + "Epoch 516/1000 \t Train Err: 2.3027\n", + "Epoch 517/1000 \t Train Err: 2.3027\n", + "Epoch 518/1000 \t Train Err: 2.3026\n", + "Epoch 519/1000 \t Train Err: 2.3026\n", + "Epoch 520/1000 \t Train Err: 2.3026\n", + "Epoch 521/1000 \t Train Err: 2.3025\n", + "Epoch 522/1000 \t Train Err: 2.3027\n", + "Epoch 523/1000 \t Train Err: 2.3027\n", + "Epoch 524/1000 \t Train Err: 2.3027\n", + "Epoch 525/1000 \t Train Err: 2.3026\n", + "Epoch 526/1000 \t Train Err: 2.3026\n", + "Epoch 527/1000 \t Train Err: 2.3026\n", + "Epoch 528/1000 \t Train Err: 2.3027\n", + "Epoch 529/1000 \t Train Err: 2.3028\n", + "Epoch 530/1000 \t Train Err: 2.3026\n", + "Epoch 531/1000 \t Train Err: 2.3026\n", + "Epoch 532/1000 \t Train Err: 2.3026\n", + "Epoch 533/1000 \t Train Err: 2.3026\n", + "Epoch 534/1000 \t Train Err: 2.3026\n", + "Epoch 535/1000 \t Train Err: 2.3026\n", + "Epoch 536/1000 \t Train Err: 2.3027\n", + "Epoch 537/1000 \t Train Err: 2.3027\n", + "Epoch 538/1000 \t Train Err: 2.3025\n", + "Epoch 539/1000 \t Train Err: 2.3026\n", + "Epoch 540/1000 \t Train Err: 2.3026\n", + "Epoch 541/1000 \t Train Err: 2.3026\n", + "Epoch 542/1000 \t Train Err: 2.3026\n", + "Epoch 543/1000 \t Train Err: 2.3026\n", + "Epoch 544/1000 \t Train Err: 2.3026\n", + "Epoch 545/1000 \t Train Err: 2.3026\n", + "Epoch 546/1000 \t Train Err: 2.3027\n", + "Epoch 547/1000 \t Train Err: 2.3026\n", + "Epoch 548/1000 \t Train Err: 2.3026\n", + "Epoch 549/1000 \t Train Err: 2.3026\n", + "Epoch 550/1000 \t Train Err: 2.3026\n", + "Epoch 551/1000 \t Train Err: 2.3026\n", + "Epoch 552/1000 \t Train Err: 2.3028\n", + "Epoch 553/1000 \t Train Err: 2.3028\n", + "Epoch 554/1000 \t Train Err: 2.3027\n", + "Epoch 555/1000 \t Train Err: 2.3026\n", + "Epoch 556/1000 \t Train Err: 2.3027\n", + "Epoch 557/1000 \t Train Err: 2.3027\n", + "Epoch 558/1000 \t Train Err: 2.3028\n", + "Epoch 559/1000 \t Train Err: 2.3026\n", + "Epoch 560/1000 \t Train Err: 2.3026\n", + "Epoch 561/1000 \t Train Err: 2.3026\n", + "Epoch 562/1000 \t Train Err: 2.3027\n", + "Epoch 563/1000 \t Train Err: 2.3027\n", + "Epoch 564/1000 \t Train Err: 2.3027\n", + "Epoch 565/1000 \t Train Err: 2.3025\n", + "Epoch 566/1000 \t Train Err: 2.3026\n", + "Epoch 567/1000 \t Train Err: 2.3026\n", + "Epoch 568/1000 \t Train Err: 2.3026\n", + "Epoch 569/1000 \t Train Err: 2.3026\n", + "Epoch 570/1000 \t Train Err: 2.3026\n", + "Epoch 571/1000 \t Train Err: 2.3026\n", + "Epoch 572/1000 \t Train Err: 2.3026\n", + "Epoch 573/1000 \t Train Err: 2.3026\n", + "Epoch 574/1000 \t Train Err: 2.3026\n", + "Epoch 575/1000 \t Train Err: 2.3028\n", + "Epoch 576/1000 \t Train Err: 2.3026\n", + "Epoch 577/1000 \t Train Err: 2.3026\n", + "Epoch 578/1000 \t Train Err: 2.3025\n", + "Epoch 579/1000 \t Train Err: 2.3026\n", + "Epoch 580/1000 \t Train Err: 2.3026\n", + "Epoch 581/1000 \t Train Err: 2.3027\n", + "Epoch 582/1000 \t Train Err: 2.3026\n", + "Epoch 583/1000 \t Train Err: 2.3027\n", + "Epoch 584/1000 \t Train Err: 2.3027\n", + "Epoch 585/1000 \t Train Err: 2.3026\n", + "Epoch 586/1000 \t Train Err: 2.3026\n", + "Epoch 587/1000 \t Train Err: 2.3026\n", + "Epoch 588/1000 \t Train Err: 2.3026\n", + "Epoch 589/1000 \t Train Err: 2.3027\n", + "Epoch 590/1000 \t Train Err: 2.3026\n", + "Epoch 591/1000 \t Train Err: 2.3026\n", + "Epoch 592/1000 \t Train Err: 2.3026\n", + "Epoch 593/1000 \t Train Err: 2.3026\n", + "Epoch 594/1000 \t Train Err: 2.3026\n", + "Epoch 595/1000 \t Train Err: 2.3026\n", + "Epoch 596/1000 \t Train Err: 2.3026\n", + "Epoch 597/1000 \t Train Err: 2.3027\n", + "Epoch 598/1000 \t Train Err: 2.3026\n", + "Epoch 599/1000 \t Train Err: 2.3027\n", + "Epoch 600/1000 \t Train Err: 2.3027\n", + "Epoch 601/1000 \t Train Err: 2.3026\n", + "Epoch 602/1000 \t Train Err: 2.3026\n", + "Epoch 603/1000 \t Train Err: 2.3026\n", + "Epoch 604/1000 \t Train Err: 2.3026\n", + "Epoch 605/1000 \t Train Err: 2.3026\n", + "Epoch 606/1000 \t Train Err: 2.3027\n", + "Epoch 607/1000 \t Train Err: 2.3026\n", + "Epoch 608/1000 \t Train Err: 2.3026\n", + "Epoch 609/1000 \t Train Err: 2.3026\n", + "Epoch 610/1000 \t Train Err: 2.3026\n", + "Epoch 611/1000 \t Train Err: 2.3026\n", + "Epoch 612/1000 \t Train Err: 2.3027\n", + "Epoch 613/1000 \t Train Err: 2.3025\n", + "Epoch 614/1000 \t Train Err: 2.3026\n", + "Epoch 615/1000 \t Train Err: 2.3026\n", + "Epoch 616/1000 \t Train Err: 2.3026\n", + "Epoch 617/1000 \t Train Err: 2.3026\n", + "Epoch 618/1000 \t Train Err: 2.3026\n", + "Epoch 619/1000 \t Train Err: 2.3026\n", + "Epoch 620/1000 \t Train Err: 2.3026\n", + "Epoch 621/1000 \t Train Err: 2.3026\n", + "Epoch 622/1000 \t Train Err: 2.3026\n", + "Epoch 623/1000 \t Train Err: 2.3026\n", + "Epoch 624/1000 \t Train Err: 2.3026\n", + "Epoch 625/1000 \t Train Err: 2.3026\n", + "Epoch 626/1000 \t Train Err: 2.3026\n", + "Epoch 627/1000 \t Train Err: 2.3026\n", + "Epoch 628/1000 \t Train Err: 2.3026\n", + "Epoch 629/1000 \t Train Err: 2.3026\n", + "Epoch 630/1000 \t Train Err: 2.3027\n", + "Epoch 631/1000 \t Train Err: 2.3026\n", + "Epoch 632/1000 \t Train Err: 2.3026\n", + "Epoch 633/1000 \t Train Err: 2.3025\n", + "Epoch 634/1000 \t Train Err: 2.3026\n", + "Epoch 635/1000 \t Train Err: 2.3026\n", + "Epoch 636/1000 \t Train Err: 2.3026\n", + "Epoch 637/1000 \t Train Err: 2.3026\n", + "Epoch 638/1000 \t Train Err: 2.3026\n", + "Epoch 639/1000 \t Train Err: 2.3027\n", + "Epoch 640/1000 \t Train Err: 2.3026\n", + "Epoch 641/1000 \t Train Err: 2.3026\n", + "Epoch 642/1000 \t Train Err: 2.3026\n", + "Epoch 643/1000 \t Train Err: 2.3026\n", + "Epoch 644/1000 \t Train Err: 2.3027\n", + "Epoch 645/1000 \t Train Err: 2.3026\n", + "Epoch 646/1000 \t Train Err: 2.3026\n", + "Epoch 647/1000 \t Train Err: 2.3025\n", + "Epoch 648/1000 \t Train Err: 2.3026\n", + "Epoch 649/1000 \t Train Err: 2.3026\n", + "Epoch 650/1000 \t Train Err: 2.3025\n", + "Epoch 651/1000 \t Train Err: 2.3026\n", + "Epoch 652/1000 \t Train Err: 2.3025\n", + "Epoch 653/1000 \t Train Err: 2.3026\n", + "Epoch 654/1000 \t Train Err: 2.3026\n", + "Epoch 655/1000 \t Train Err: 2.3026\n", + "Epoch 656/1000 \t Train Err: 2.3026\n", + "Epoch 657/1000 \t Train Err: 2.3025\n", + "Epoch 658/1000 \t Train Err: 2.3026\n", + "Epoch 659/1000 \t Train Err: 2.3025\n", + "Epoch 660/1000 \t Train Err: 2.3025\n", + "Epoch 661/1000 \t Train Err: 2.3025\n", + "Epoch 662/1000 \t Train Err: 2.3026\n", + "Epoch 663/1000 \t Train Err: 2.3026\n", + "Epoch 664/1000 \t Train Err: 2.3025\n", + "Epoch 665/1000 \t Train Err: 2.3026\n", + "Epoch 666/1000 \t Train Err: 2.3026\n", + "Epoch 667/1000 \t Train Err: 2.3025\n", + "Epoch 668/1000 \t Train Err: 2.3026\n", + "Epoch 669/1000 \t Train Err: 2.3026\n", + "Epoch 670/1000 \t Train Err: 2.3025\n", + "Epoch 671/1000 \t Train Err: 2.3026\n", + "Epoch 672/1000 \t Train Err: 2.3025\n", + "Epoch 673/1000 \t Train Err: 2.3024\n", + "Epoch 674/1000 \t Train Err: 2.3024\n", + "Epoch 675/1000 \t Train Err: 2.3024\n", + "Epoch 676/1000 \t Train Err: 2.3024\n", + "Epoch 677/1000 \t Train Err: 2.3023\n", + "Epoch 678/1000 \t Train Err: 2.3024\n", + "Epoch 679/1000 \t Train Err: 2.3022\n", + "Epoch 680/1000 \t Train Err: 2.3022\n", + "Epoch 681/1000 \t Train Err: 2.3022\n", + "Epoch 682/1000 \t Train Err: 2.3020\n", + "Epoch 683/1000 \t Train Err: 2.3018\n", + "Epoch 684/1000 \t Train Err: 2.3016\n", + "Epoch 685/1000 \t Train Err: 2.3014\n", + "Epoch 686/1000 \t Train Err: 2.3011\n", + "Epoch 687/1000 \t Train Err: 2.3007\n", + "Epoch 688/1000 \t Train Err: 2.3007\n", + "Epoch 689/1000 \t Train Err: 2.2999\n", + "Epoch 690/1000 \t Train Err: 2.2999\n", + "Epoch 691/1000 \t Train Err: 2.2993\n", + "Epoch 692/1000 \t Train Err: 2.2993\n", + "Epoch 693/1000 \t Train Err: 2.2988\n", + "Epoch 694/1000 \t Train Err: 2.2987\n", + "Epoch 695/1000 \t Train Err: 2.2983\n", + "Epoch 696/1000 \t Train Err: 2.2976\n", + "Epoch 697/1000 \t Train Err: 2.2974\n", + "Epoch 698/1000 \t Train Err: 2.2969\n", + "Epoch 699/1000 \t Train Err: 2.2975\n", + "Epoch 700/1000 \t Train Err: 2.2955\n", + "Epoch 701/1000 \t Train Err: 2.2967\n", + "Epoch 702/1000 \t Train Err: 2.2958\n", + "Epoch 703/1000 \t Train Err: 2.2933\n", + "Epoch 704/1000 \t Train Err: 2.2951\n", + "Epoch 705/1000 \t Train Err: 2.2939\n", + "Epoch 706/1000 \t Train Err: 2.2922\n", + "Epoch 707/1000 \t Train Err: 2.2919\n", + "Epoch 708/1000 \t Train Err: 2.2901\n", + "Epoch 709/1000 \t Train Err: 2.2897\n", + "Epoch 710/1000 \t Train Err: 2.2867\n", + "Epoch 711/1000 \t Train Err: 2.2855\n", + "Epoch 712/1000 \t Train Err: 2.2841\n", + "Epoch 713/1000 \t Train Err: 2.2844\n", + "Epoch 714/1000 \t Train Err: 2.2812\n", + "Epoch 715/1000 \t Train Err: 2.2801\n", + "Epoch 716/1000 \t Train Err: 2.2789\n", + "Epoch 717/1000 \t Train Err: 2.2761\n", + "Epoch 718/1000 \t Train Err: 2.2797\n", + "Epoch 719/1000 \t Train Err: 2.2796\n", + "Epoch 720/1000 \t Train Err: 2.2974\n", + "Epoch 721/1000 \t Train Err: 2.2786\n", + "Epoch 722/1000 \t Train Err: 2.2802\n", + "Epoch 723/1000 \t Train Err: 2.2805\n", + "Epoch 724/1000 \t Train Err: 2.2812\n", + "Epoch 725/1000 \t Train Err: 2.2812\n", + "Epoch 726/1000 \t Train Err: 2.2792\n", + "Epoch 727/1000 \t Train Err: 2.2780\n", + "Epoch 728/1000 \t Train Err: 2.2775\n", + "Epoch 729/1000 \t Train Err: 2.2750\n", + "Epoch 730/1000 \t Train Err: 2.2821\n", + "Epoch 731/1000 \t Train Err: 2.2815\n", + "Epoch 732/1000 \t Train Err: 2.2812\n", + "Epoch 733/1000 \t Train Err: 2.2779\n", + "Epoch 734/1000 \t Train Err: 2.2777\n", + "Epoch 735/1000 \t Train Err: 2.2799\n", + "Epoch 736/1000 \t Train Err: 2.2754\n", + "Epoch 737/1000 \t Train Err: 2.2742\n", + "Epoch 738/1000 \t Train Err: 2.2723\n", + "Epoch 739/1000 \t Train Err: 2.2719\n", + "Epoch 740/1000 \t Train Err: 2.2674\n", + "Epoch 741/1000 \t Train Err: 2.2694\n", + "Epoch 742/1000 \t Train Err: 2.2702\n", + "Epoch 743/1000 \t Train Err: 2.2693\n", + "Epoch 744/1000 \t Train Err: 2.2722\n", + "Epoch 745/1000 \t Train Err: 2.2704\n", + "Epoch 746/1000 \t Train Err: 2.2675\n", + "Epoch 747/1000 \t Train Err: 2.2644\n", + "Epoch 748/1000 \t Train Err: 2.2599\n", + "Epoch 749/1000 \t Train Err: 2.2583\n", + "Epoch 750/1000 \t Train Err: 2.2578\n", + "Epoch 751/1000 \t Train Err: 2.2507\n", + "Epoch 752/1000 \t Train Err: 2.2490\n", + "Epoch 753/1000 \t Train Err: 2.2501\n", + "Epoch 754/1000 \t Train Err: 2.2502\n", + "Epoch 755/1000 \t Train Err: 2.2520\n", + "Epoch 756/1000 \t Train Err: 2.2435\n", + "Epoch 757/1000 \t Train Err: 2.2432\n", + "Epoch 758/1000 \t Train Err: 2.2420\n", + "Epoch 759/1000 \t Train Err: 2.2393\n", + "Epoch 760/1000 \t Train Err: 2.2372\n", + "Epoch 761/1000 \t Train Err: 2.2302\n", + "Epoch 762/1000 \t Train Err: 2.2302\n", + "Epoch 763/1000 \t Train Err: 2.2294\n", + "Epoch 764/1000 \t Train Err: 2.2201\n", + "Epoch 765/1000 \t Train Err: 2.2195\n", + "Epoch 766/1000 \t Train Err: 2.2166\n", + "Epoch 767/1000 \t Train Err: 2.2139\n", + "Epoch 768/1000 \t Train Err: 2.2096\n", + "Epoch 769/1000 \t Train Err: 2.2100\n", + "Epoch 770/1000 \t Train Err: 2.2073\n", + "Epoch 771/1000 \t Train Err: 2.2058\n", + "Epoch 772/1000 \t Train Err: 2.2096\n", + "Epoch 773/1000 \t Train Err: 2.2055\n", + "Epoch 774/1000 \t Train Err: 2.2213\n", + "Epoch 775/1000 \t Train Err: 2.2435\n", + "Epoch 776/1000 \t Train Err: 2.2282\n", + "Epoch 777/1000 \t Train Err: 2.2328\n", + "Epoch 778/1000 \t Train Err: 2.2254\n", + "Epoch 779/1000 \t Train Err: 2.2246\n", + "Epoch 780/1000 \t Train Err: 2.2241\n", + "Epoch 781/1000 \t Train Err: 2.2217\n", + "Epoch 782/1000 \t Train Err: 2.2156\n", + "Epoch 783/1000 \t Train Err: 2.2219\n", + "Epoch 784/1000 \t Train Err: 2.2151\n", + "Epoch 785/1000 \t Train Err: 2.2259\n", + "Epoch 786/1000 \t Train Err: 2.2226\n", + "Epoch 787/1000 \t Train Err: 2.2176\n", + "Epoch 788/1000 \t Train Err: 2.2152\n", + "Epoch 789/1000 \t Train Err: 2.2099\n", + "Epoch 790/1000 \t Train Err: 2.2069\n", + "Epoch 791/1000 \t Train Err: 2.2034\n", + "Epoch 792/1000 \t Train Err: 2.2080\n", + "Epoch 793/1000 \t Train Err: 2.1999\n", + "Epoch 794/1000 \t Train Err: 2.1925\n", + "Epoch 795/1000 \t Train Err: 2.1840\n", + "Epoch 796/1000 \t Train Err: 2.1820\n", + "Epoch 797/1000 \t Train Err: 2.1907\n", + "Epoch 798/1000 \t Train Err: 2.1835\n", + "Epoch 799/1000 \t Train Err: 2.1886\n", + "Epoch 800/1000 \t Train Err: 2.1807\n", + "Epoch 801/1000 \t Train Err: 2.1841\n", + "Epoch 802/1000 \t Train Err: 2.1776\n", + "Epoch 803/1000 \t Train Err: 2.1800\n", + "Epoch 804/1000 \t Train Err: 2.1715\n", + "Epoch 805/1000 \t Train Err: 2.1717\n", + "Epoch 806/1000 \t Train Err: 2.1701\n", + "Epoch 807/1000 \t Train Err: 2.1635\n", + "Epoch 808/1000 \t Train Err: 2.1664\n", + "Epoch 809/1000 \t Train Err: 2.1603\n", + "Epoch 810/1000 \t Train Err: 2.1636\n", + "Epoch 811/1000 \t Train Err: 2.1575\n", + "Epoch 812/1000 \t Train Err: 2.1587\n", + "Epoch 813/1000 \t Train Err: 2.1559\n", + "Epoch 814/1000 \t Train Err: 2.1540\n", + "Epoch 815/1000 \t Train Err: 2.1537\n", + "Epoch 816/1000 \t Train Err: 2.1514\n", + "Epoch 817/1000 \t Train Err: 2.1500\n", + "Epoch 818/1000 \t Train Err: 2.1488\n", + "Epoch 819/1000 \t Train Err: 2.1475\n", + "Epoch 820/1000 \t Train Err: 2.1447\n", + "Epoch 821/1000 \t Train Err: 2.1434\n", + "Epoch 822/1000 \t Train Err: 2.1431\n", + "Epoch 823/1000 \t Train Err: 2.1441\n", + "Epoch 824/1000 \t Train Err: 2.1816\n", + "Epoch 825/1000 \t Train Err: 2.1863\n", + "Epoch 826/1000 \t Train Err: 2.1601\n", + "Epoch 827/1000 \t Train Err: 2.1623\n", + "Epoch 828/1000 \t Train Err: 2.1957\n", + "Epoch 829/1000 \t Train Err: 2.1775\n", + "Epoch 830/1000 \t Train Err: 2.1971\n", + "Epoch 831/1000 \t Train Err: 2.1851\n", + "Epoch 832/1000 \t Train Err: 2.1738\n", + "Epoch 833/1000 \t Train Err: 2.1654\n", + "Epoch 834/1000 \t Train Err: 2.1627\n", + "Epoch 835/1000 \t Train Err: 2.1606\n", + "Epoch 836/1000 \t Train Err: 2.1487\n", + "Epoch 837/1000 \t Train Err: 2.1494\n", + "Epoch 838/1000 \t Train Err: 2.1563\n", + "Epoch 839/1000 \t Train Err: 2.1521\n", + "Epoch 840/1000 \t Train Err: 2.1515\n", + "Epoch 841/1000 \t Train Err: 2.1484\n", + "Epoch 842/1000 \t Train Err: 2.1476\n", + "Epoch 843/1000 \t Train Err: 2.1406\n", + "Epoch 844/1000 \t Train Err: 2.1410\n", + "Epoch 845/1000 \t Train Err: 2.1359\n", + "Epoch 846/1000 \t Train Err: 2.1344\n", + "Epoch 847/1000 \t Train Err: 2.1323\n", + "Epoch 848/1000 \t Train Err: 2.1236\n", + "Epoch 849/1000 \t Train Err: 2.1241\n", + "Epoch 850/1000 \t Train Err: 2.1162\n", + "Epoch 851/1000 \t Train Err: 2.1179\n", + "Epoch 852/1000 \t Train Err: 2.1033\n", + "Epoch 853/1000 \t Train Err: 2.1022\n", + "Epoch 854/1000 \t Train Err: 2.1009\n", + "Epoch 855/1000 \t Train Err: 2.0978\n", + "Epoch 856/1000 \t Train Err: 2.0911\n", + "Epoch 857/1000 \t Train Err: 2.0932\n", + "Epoch 858/1000 \t Train Err: 2.0898\n", + "Epoch 859/1000 \t Train Err: 2.0844\n", + "Epoch 860/1000 \t Train Err: 2.0767\n", + "Epoch 861/1000 \t Train Err: 2.0732\n", + "Epoch 862/1000 \t Train Err: 2.0769\n", + "Epoch 863/1000 \t Train Err: 2.0725\n", + "Epoch 864/1000 \t Train Err: 2.0700\n", + "Epoch 865/1000 \t Train Err: 2.0612\n", + "Epoch 866/1000 \t Train Err: 2.0637\n", + "Epoch 867/1000 \t Train Err: 2.0580\n", + "Epoch 868/1000 \t Train Err: 2.0598\n", + "Epoch 869/1000 \t Train Err: 2.0535\n", + "Epoch 870/1000 \t Train Err: 2.0503\n", + "Epoch 871/1000 \t Train Err: 2.0492\n", + "Epoch 872/1000 \t Train Err: 2.0431\n", + "Epoch 873/1000 \t Train Err: 2.0423\n", + "Epoch 874/1000 \t Train Err: 2.0382\n", + "Epoch 875/1000 \t Train Err: 2.0328\n", + "Epoch 876/1000 \t Train Err: 2.0313\n", + "Epoch 877/1000 \t Train Err: 2.0280\n", + "Epoch 878/1000 \t Train Err: 2.0297\n", + "Epoch 879/1000 \t Train Err: 2.0243\n", + "Epoch 880/1000 \t Train Err: 2.0243\n", + "Epoch 881/1000 \t Train Err: 2.0222\n", + "Epoch 882/1000 \t Train Err: 2.0209\n", + "Epoch 883/1000 \t Train Err: 2.0161\n", + "Epoch 884/1000 \t Train Err: 2.0157\n", + "Epoch 885/1000 \t Train Err: 2.0253\n", + "Epoch 886/1000 \t Train Err: 2.0697\n", + "Epoch 887/1000 \t Train Err: 2.2021\n", + "Epoch 888/1000 \t Train Err: 2.2692\n", + "Epoch 889/1000 \t Train Err: 2.1106\n", + "Epoch 890/1000 \t Train Err: 2.1653\n", + "Epoch 891/1000 \t Train Err: 2.2021\n", + "Epoch 892/1000 \t Train Err: 2.1370\n", + "Epoch 893/1000 \t Train Err: 2.1576\n", + "Epoch 894/1000 \t Train Err: 2.1296\n", + "Epoch 895/1000 \t Train Err: 2.1303\n", + "Epoch 896/1000 \t Train Err: 2.1201\n", + "Epoch 897/1000 \t Train Err: 2.1001\n", + "Epoch 898/1000 \t Train Err: 2.1209\n", + "Epoch 899/1000 \t Train Err: 2.1034\n", + "Epoch 900/1000 \t Train Err: 2.1103\n", + "Epoch 901/1000 \t Train Err: 2.0983\n", + "Epoch 902/1000 \t Train Err: 2.0762\n", + "Epoch 903/1000 \t Train Err: 2.0929\n", + "Epoch 904/1000 \t Train Err: 2.0643\n", + "Epoch 905/1000 \t Train Err: 2.0555\n", + "Epoch 906/1000 \t Train Err: 2.0589\n", + "Epoch 907/1000 \t Train Err: 2.0454\n", + "Epoch 908/1000 \t Train Err: 2.0500\n", + "Epoch 909/1000 \t Train Err: 2.0418\n", + "Epoch 910/1000 \t Train Err: 2.0363\n", + "Epoch 911/1000 \t Train Err: 2.0357\n", + "Epoch 912/1000 \t Train Err: 2.0323\n", + "Epoch 913/1000 \t Train Err: 2.0282\n", + "Epoch 914/1000 \t Train Err: 2.0242\n", + "Epoch 915/1000 \t Train Err: 2.0120\n", + "Epoch 916/1000 \t Train Err: 2.0127\n", + "Epoch 917/1000 \t Train Err: 2.0133\n", + "Epoch 918/1000 \t Train Err: 2.0097\n", + "Epoch 919/1000 \t Train Err: 2.0087\n", + "Epoch 920/1000 \t Train Err: 2.0099\n", + "Epoch 921/1000 \t Train Err: 2.0076\n", + "Epoch 922/1000 \t Train Err: 2.0020\n", + "Epoch 923/1000 \t Train Err: 1.9990\n", + "Epoch 924/1000 \t Train Err: 1.9967\n", + "Epoch 925/1000 \t Train Err: 1.9966\n", + "Epoch 926/1000 \t Train Err: 1.9946\n", + "Epoch 927/1000 \t Train Err: 1.9904\n", + "Epoch 928/1000 \t Train Err: 1.9874\n", + "Epoch 929/1000 \t Train Err: 1.9974\n", + "Epoch 930/1000 \t Train Err: 1.9857\n", + "Epoch 931/1000 \t Train Err: 1.9892\n", + "Epoch 932/1000 \t Train Err: 1.9947\n", + "Epoch 933/1000 \t Train Err: 1.9974\n", + "Epoch 934/1000 \t Train Err: 2.0159\n", + "Epoch 935/1000 \t Train Err: 2.0433\n", + "Epoch 936/1000 \t Train Err: 2.0755\n", + "Epoch 937/1000 \t Train Err: 2.0014\n", + "Epoch 938/1000 \t Train Err: 2.0443\n", + "Epoch 939/1000 \t Train Err: 2.0184\n", + "Epoch 940/1000 \t Train Err: 2.0192\n", + "Epoch 941/1000 \t Train Err: 2.0248\n", + "Epoch 942/1000 \t Train Err: 2.0124\n", + "Epoch 943/1000 \t Train Err: 2.0101\n", + "Epoch 944/1000 \t Train Err: 2.0024\n", + "Epoch 945/1000 \t Train Err: 2.0011\n", + "Epoch 946/1000 \t Train Err: 1.9871\n", + "Epoch 947/1000 \t Train Err: 1.9816\n", + "Epoch 948/1000 \t Train Err: 1.9875\n", + "Epoch 949/1000 \t Train Err: 2.0660\n", + "Epoch 950/1000 \t Train Err: 2.0591\n", + "Epoch 951/1000 \t Train Err: 2.0214\n", + "Epoch 952/1000 \t Train Err: 2.0312\n", + "Epoch 953/1000 \t Train Err: 2.0470\n", + "Epoch 954/1000 \t Train Err: 2.0365\n", + "Epoch 955/1000 \t Train Err: 2.0143\n", + "Epoch 956/1000 \t Train Err: 2.0104\n", + "Epoch 957/1000 \t Train Err: 2.0289\n", + "Epoch 958/1000 \t Train Err: 2.0097\n", + "Epoch 959/1000 \t Train Err: 1.9998\n", + "Epoch 960/1000 \t Train Err: 2.0095\n", + "Epoch 961/1000 \t Train Err: 2.0110\n", + "Epoch 962/1000 \t Train Err: 2.0009\n", + "Epoch 963/1000 \t Train Err: 1.9930\n", + "Epoch 964/1000 \t Train Err: 2.0003\n", + "Epoch 965/1000 \t Train Err: 1.9912\n", + "Epoch 966/1000 \t Train Err: 1.9859\n", + "Epoch 967/1000 \t Train Err: 1.9843\n", + "Epoch 968/1000 \t Train Err: 1.9828\n", + "Epoch 969/1000 \t Train Err: 1.9776\n", + "Epoch 970/1000 \t Train Err: 1.9790\n", + "Epoch 971/1000 \t Train Err: 1.9697\n", + "Epoch 972/1000 \t Train Err: 1.9671\n", + "Epoch 973/1000 \t Train Err: 1.9673\n", + "Epoch 974/1000 \t Train Err: 1.9585\n", + "Epoch 975/1000 \t Train Err: 1.9605\n", + "Epoch 976/1000 \t Train Err: 1.9537\n", + "Epoch 977/1000 \t Train Err: 1.9529\n", + "Epoch 978/1000 \t Train Err: 1.9477\n", + "Epoch 979/1000 \t Train Err: 1.9485\n", + "Epoch 980/1000 \t Train Err: 1.9376\n", + "Epoch 981/1000 \t Train Err: 1.9426\n", + "Epoch 982/1000 \t Train Err: 1.9416\n", + "Epoch 983/1000 \t Train Err: 1.9334\n", + "Epoch 984/1000 \t Train Err: 1.9249\n", + "Epoch 985/1000 \t Train Err: 1.9216\n", + "Epoch 986/1000 \t Train Err: 1.9268\n", + "Epoch 987/1000 \t Train Err: 1.9630\n", + "Epoch 988/1000 \t Train Err: 2.0237\n", + "Epoch 989/1000 \t Train Err: 2.0037\n", + "Epoch 990/1000 \t Train Err: 1.9824\n", + "Epoch 991/1000 \t Train Err: 1.9718\n", + "Epoch 992/1000 \t Train Err: 1.9726\n", + "Epoch 993/1000 \t Train Err: 1.9536\n", + "Epoch 994/1000 \t Train Err: 1.9662\n", + "Epoch 995/1000 \t Train Err: 1.9492\n", + "Epoch 996/1000 \t Train Err: 1.9482\n", + "Epoch 997/1000 \t Train Err: 1.9375\n", + "Epoch 998/1000 \t Train Err: 1.9492\n", + "Epoch 999/1000 \t Train Err: 1.9351\n" + ] + } + ], + "source": [ + "model.train()\n", + "for epoch in range(NEPOCHS):\n", + " optimizer.zero_grad()\n", + " data, labels = mkbatch_rtl(BSZ)\n", + " # shift labels to prevent cheating\n", + " shifted_labels = torch.roll(labels, 1, dims=1)\n", + " shifted_labels[:, 0] = VOCAB_SIZE # start token\n", + " outputs = model(data, shifted_labels).permute((0, 2, 1))\n", + " loss = criterion(outputs, labels)\n", + " train_loss = loss.item()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " train_err.append(train_loss)\n", + "\n", + " with open('loss', 'a') as f:\n", + " f.write(f\"{train_loss}\\n\")\n", + " print(f\"Epoch {epoch}/{NEPOCHS} \\t Train Err: {train_loss:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "execution_state": "idle", + "id": "a3c41150-4541-4722-83a7-e7ad937f6c4f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[3, 8, 4, 0]], device='cuda:0') tensor([[7, 8]], device='cuda:0')\n", + "tensor([-4.4248e+00, -1.0567e+00, 1.2971e+00, -2.0221e+00, -6.6597e-01,\n", + " -2.6027e+00, -1.5254e-02, 8.1894e+00, -1.6939e-03, -1.2252e+00],\n", + " device='cuda:0')\n", + "tensor([-3.7663, -1.7898, -1.4273, 1.9667, -2.3513, -4.7138, -2.2421, 3.6817,\n", + " 8.9049, 3.1622], device='cuda:0')\n", + "tensor([[7, 8]], device='cuda:0', dtype=torch.int32) tensor([[7, 8]], device='cuda:0')\n" + ] + } + ], + "source": [ + "model.eval()\n", + "data, labels = mkbatch_rtl(1)\n", + "print(data, labels)\n", + "with torch.no_grad():\n", + " ans = torch.zeros((1, NUM_LEN), dtype=torch.int, device=device)\n", + " ans[0, 0] = VOCAB_SIZE\n", + " for i in range(NUM_LEN):\n", + " outputs = model(data, ans)\n", + " print(outputs[0, i])\n", + " # break\n", + " ans[0, (i + 1) % NUM_LEN] = torch.argmax(outputs[0, i])\n", + "ans = torch.roll(ans, -1, dims=1)\n", + "print(ans, labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "execution_state": "idle", + "id": "1843b944-bab5-40ee-b26e-5d3b87ea9454", + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'add-ltr-loss'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[32], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmath\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madd-ltr-loss\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mplot(\u001b[38;5;28mrange\u001b[39m(NEPOCHS), \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mmap\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m x: math\u001b[38;5;241m.\u001b[39mlog(\u001b[38;5;28mfloat\u001b[39m(x)), f\u001b[38;5;241m.\u001b[39mreadlines())))\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madd-rtl-loss\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n", + "File \u001b[0;32m~/.venv/lib64/python3.12/site-packages/IPython/core/interactiveshell.py:324\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 319\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 320\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 322\u001b[0m )\n\u001b[0;32m--> 324\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'add-ltr-loss'" + ] + } + ], + "source": [ + "import math\n", + "import matplotlib.pyplot as plt\n", + "\n", + "with open(\"add-ltr-loss\") as f:\n", + " plt.plot(range(NEPOCHS), list(map(lambda x: math.log(float(x)), f.readlines())))\n", + "with open(\"add-rtl-loss\") as f:\n", + " plt.plot(range(NEPOCHS), list(map(lambda x: math.log(float(x)), f.readlines())))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97b349f-f20b-441d-8c7f-1724e8cf30cc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/japanese.ipynb b/notebooks/japanese.ipynb new file mode 100644 index 0000000..4112019 --- /dev/null +++ b/notebooks/japanese.ipynb @@ -0,0 +1,489 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "execution_state": "idle", + "id": "1ddfc692-bda7-4d38-a549-2fb0d40d437d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.\n", + " - If you're using `trust_remote_code=True`, you can get rid of this warning by loading the model with an auto class. See https://huggingface.co/docs/transformers/en/model_doc/auto#auto-classes\n", + " - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).\n", + " - If you are not the owner of the model architecture class, please contact the model code owner to update it.\n", + "Some weights of the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_ltr/epoch_3_checkpt were not used when initializing BertForMaskedLM: ['attention_mask']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertForMaskedLM were not initialized from the model checkpoint at /home/sipb/nlp-class-project/checkpoints/bert_base_ltr/epoch_3_checkpt and are newly initialized because the shapes did not match:\n", + "- bert.embeddings.position_embeddings.weight: found shape torch.Size([512, 768]) in the checkpoint and torch.Size([128, 768]) in the model instantiated\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "import math\n", + "import sys\n", + "\n", + "sys.path.append(\"..\")\n", + "\n", + "import torch\n", + "import transformers\n", + "\n", + "import pandas as pd\n", + "\n", + "from utils import add_attn_hooks\n", + "\n", + "# text_dir = \"rtl\"\n", + "text_dir = \"ltr\"\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "model = transformers.AutoModelForMaskedLM.from_pretrained(f\"/home/sipb/nlp-class-project/checkpoints/bert_base_{text_dir}/epoch_3_checkpt\", ignore_mismatched_sizes=True)\n", + "\n", + "# tokenizer = transformers.AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n", + "# model = transformers.AutoModelForMaskedLM.from_pretrained(f\"/home/sipb/nlp-class-project/checkpoints/distilbert_base_{text_dir}/epoch_3_checkpt\", ignore_mismatched_sizes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "145d2ffd-db55-4b8f-9fbb-85a51e0b3d11", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "execution_state": "idle", + "id": "a732375b-1682-45c6-8df0-8db1458559c9", + "metadata": {}, + "outputs": [], + "source": [ + "add_attn_hooks(model, text_dir)\n", + "model.eval();" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "execution_state": "idle", + "id": "041d1702-5aaf-45f0-9413-4014b315d1ed", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet('/home/sipb/nlp-class-project/data/japan.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "execution_state": "idle", + "id": "2bace74b-a716-4d49-a912-53155cf002ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b'\n", + "_START_ARTICLE_\n", + "ビートたけしの教科書に載らない日本人の謎\n", + "_START_SECTION_\n", + "概要\n", + "_START_PARAGRAPH_\n", + "「教科書には決して載らない」日本人の謎やしきたりを多角的に検証し、日本人のDNAを解明する。_NEWLINE_新春番組として定期的に放送されており、年末の午前中に再放送されるのが恒例となっている。'\n" + ] + } + ], + "source": [ + "df[\"text\"][0]\n", + "import codecs\n", + "decoded_str = codecs.escape_decode(df[\"text\"][0])[0].decode('utf-8')\n", + "print(decoded_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "execution_state": "idle", + "id": "8a9147ea-d9dc-4826-8030-c8417609405d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q : where do pandas live? a : (,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, and,, (,,,,,,.,,,,,,,,,,,,, and\n" + ] + } + ], + "source": [ + "input_text = [\"Q: Where do pandas live? A:\"]#, \"ビートたけしの教科書に載らない日\"]\n", + "batch = tokenizer(input_text, return_tensors=\"pt\", padding_side=\"right\", padding=\"max_length\", max_length=64)\n", + "output_ids = model.generate(batch['input_ids'], max_length=128, do_sample=False) # do_sample=False ensures greedy decoding\n", + "decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + "print(decoded_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "execution_state": "idle", + "id": "1a7c9b35-0c07-431d-91df-bd2f8c7467eb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MaskedLMOutput(loss=None, logits=tensor([[[ -7.9645, -7.6722, -7.8979, ..., -8.6562, -8.2586, -6.7448],\n", + " [-11.1255, -11.2591, -11.3443, ..., -10.1338, -11.9891, -10.2974],\n", + " [ -8.1256, -8.1880, -7.9874, ..., -8.0597, -8.6987, -10.2472],\n", + " ...,\n", + " [-14.5633, -14.4418, -14.4735, ..., -14.5651, -14.2234, -13.5610],\n", + " [-18.9095, -18.6487, -18.7593, ..., -19.1327, -18.8564, -17.4334],\n", + " [-17.8532, -17.6451, -17.7208, ..., -18.0046, -17.7334, -16.5670]]]), hidden_states=None, attentions=None)\n" + ] + } + ], + "source": [ + "with torch.inference_mode():\n", + " batch = tokenizer([\"ビートたけしの教科書に載らない日本人の謎\"], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " output = model(**batch)\n", + " print(output)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "execution_state": "idle", + "id": "a4098975-2df6-4435-bc93-1a5afd6d7e68", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'riddles' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# CPU is fast enough\u001b[39;00m\n\u001b[1;32m 3\u001b[0m ppls \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m riddle \u001b[38;5;129;01min\u001b[39;00m \u001b[43mriddles\u001b[49m:\n\u001b[1;32m 5\u001b[0m batch \u001b[38;5;241m=\u001b[39m tokenizer([riddle], return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m, padding_side\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m text_dir \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrtl\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mright\u001b[39m\u001b[38;5;124m\"\u001b[39m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m)\n\u001b[1;32m 6\u001b[0m batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mclone()\n", + "\u001b[0;31mNameError\u001b[0m: name 'riddles' is not defined" + ] + } + ], + "source": [ + "# CPU is fast enough\n", + "\n", + "ppls = []\n", + "for riddle in riddles:\n", + " batch = tokenizer([riddle], return_tensors=\"pt\", padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"max_length\", max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].clone()\n", + " batch[\"labels\"][batch[\"attention_mask\"] == 0] = -100\n", + " # batch = tokenizer([riddle], return_tensors=\"pt\")#, padding_side=\"left\" if text_dir == \"rtl\" else \"right\", padding=\"longest\", max_length=128)\n", + " # batch[\"labels\"] = batch[\"input_ids\"]\n", + " with torch.inference_mode():\n", + " output = model(**batch)\n", + " ppls.append(math.e ** output.loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "execution_state": "idle", + "id": "c4a82af4-d0d8-415a-9135-3a1350c1402e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(692.7175314596647, 'rtl')" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "execution_state": "idle", + "id": "84a95c66-6dd3-4ccb-96a2-96f38008f70e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(616.6241458855995, 'ltr')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "execution_state": "idle", + "id": "51ed80f1-a935-42bc-8194-832f91222c45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(526.979384061791, 'rtl')" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir # distilbert" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "execution_state": "idle", + "id": "34a2edec-b1d9-466c-a457-954c587f7817", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(288.22724792187364, 'ltr')" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls), text_dir # distilbert" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "execution_state": "idle", + "id": "40a98c10-59c3-498a-a9e6-c23bd9437bc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "937.8557468023619" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(ppls) / len(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "execution_state": "idle", + "id": "80b22ba1-e5ba-4f1e-8038-158a2c2f37a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': tensor([[ 101, 1045, 2064, 2022, 2524, 1010, 2021, 1045, 2572, 2025,\n", + " 5024, 1012, 2054, 2572, 1045, 1029, 1037, 15117, 1012, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0]]), 'labels': tensor([[ 101, 1045, 2064, 2022, 2524, 1010, 2021, 1045, 2572, 2025,\n", + " 5024, 1012, 2054, 2572, 1045, 1029, 1037, 15117, 1012, 102,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n", + " -100, -100, -100, -100, -100, -100, -100, -100]])}" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "execution_state": "idle", + "id": "c68b5235-a4a7-4f38-9acb-f5072e546a96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 4., 6., 11., 6., 5., 2., 1., 1., 2., 2.]),\n", + " array([ 613.56297843, 829.36555779, 1045.16813716, 1260.97071653,\n", + " 1476.77329589, 1692.57587526, 1908.37845463, 2124.18103399,\n", + " 2339.98361336, 2555.78619272, 2771.58877209]),\n", + " <BarContainer object of 10 artists>)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.hist(ppls)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "execution_state": "idle", + "id": "8acad3ce-905d-455e-af5d-9770495f374a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414,\n", + " 956.7294281325414]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ppls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86daa05b-5784-457b-b65e-8b8395128d6f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/loss b/notebooks/loss new file mode 100644 index 0000000..e1e6942 --- /dev/null +++ b/notebooks/loss @@ -0,0 +1,1000 @@ +2.4793217182159424 +2.4309544563293457 +2.3800179958343506 +2.3492820262908936 +2.3288204669952393 +2.3201887607574463 +2.317059278488159 +2.3139350414276123 +2.3094708919525146 +2.3064167499542236 +2.3040108680725098 +2.3028905391693115 +2.302960157394409 +2.303684949874878 +2.304744243621826 +2.305999517440796 +2.3067049980163574 +2.306713819503784 +2.3067827224731445 +2.3058664798736572 +2.30601167678833 +2.30521559715271 +2.3043692111968994 +2.3038854598999023 +2.3038713932037354 +2.3032853603363037 +2.3032476902008057 +2.3031513690948486 +2.3032004833221436 +2.3028852939605713 +2.3028409481048584 +2.303209066390991 +2.303081750869751 +2.302985668182373 +2.3031227588653564 +2.3030753135681152 +2.303075075149536 +2.3029205799102783 +2.303251266479492 +2.3031625747680664 +2.30310320854187 +2.3030107021331787 +2.302666664123535 +2.3026700019836426 +2.3026769161224365 +2.3026905059814453 +2.3026187419891357 +2.3025882244110107 +2.302690267562866 +2.3026435375213623 +2.302729845046997 +2.3026950359344482 +2.302746534347534 +2.3026206493377686 +2.3027870655059814 +2.3026585578918457 +2.3026347160339355 +2.3026607036590576 +2.3025848865509033 +2.3026163578033447 +2.3026700019836426 +2.3026363849639893 +2.3026375770568848 +2.3026788234710693 +2.302553415298462 +2.3025968074798584 +2.302593469619751 +2.3026044368743896 +2.3025927543640137 +2.3026342391967773 +2.302656412124634 +2.3026087284088135 +2.302560329437256 +2.3025975227355957 +2.3026320934295654 +2.3026249408721924 +2.302595376968384 +2.302572011947632 +2.30261492729187 +2.302673101425171 +2.3026254177093506 +2.3027074337005615 +2.3025972843170166 +2.3026070594787598 +2.3025753498077393 +2.302605390548706 +2.3026022911071777 +2.3026483058929443 +2.3026845455169678 +2.3026163578033447 +2.3026020526885986 +2.3025448322296143 +2.3026366233825684 +2.302520513534546 +2.302574634552002 +2.302612543106079 +2.3025946617126465 +2.302628755569458 +2.302635908126831 +2.3026602268218994 +2.302551507949829 +2.302579164505005 +2.3025732040405273 +2.3026163578033447 +2.3025848865509033 +2.3025896549224854 +2.3026063442230225 +2.302605390548706 +2.3025991916656494 +2.302596092224121 +2.302624464035034 +2.302584409713745 +2.3026046752929688 +2.3025929927825928 +2.3026199340820312 +2.3025825023651123 +2.3025972843170166 +2.302593469619751 +2.3026280403137207 +2.3026411533355713 +2.302598237991333 +2.302582263946533 +2.302603006362915 +2.302595376968384 +2.3025989532470703 +2.302626848220825 +2.3026373386383057 +2.30259108543396 +2.3026230335235596 +2.3026344776153564 +2.3026301860809326 +2.3026552200317383 +2.302663564682007 +2.3025882244110107 +2.3025946617126465 +2.30256986618042 +2.302544355392456 +2.3026559352874756 +2.3025758266448975 +2.3024582862854004 +2.3026046752929688 +2.3026840686798096 +2.3026156425476074 +2.3026177883148193 +2.302590847015381 +2.3026554584503174 +2.3026623725891113 +2.3026609420776367 +2.302621603012085 +2.3027026653289795 +2.3026010990142822 +2.3026115894317627 +2.302668571472168 +2.3026716709136963 +2.302629232406616 +2.302640199661255 +2.302658796310425 +2.3024585247039795 +2.3025708198547363 +2.3025777339935303 +2.302565574645996 +2.3025872707366943 +2.302602767944336 +2.3025877475738525 +2.302659273147583 +2.3025927543640137 +2.3026325702667236 +2.302605152130127 +2.3026175498962402 +2.302565574645996 +2.302611827850342 +2.302602529525757 +2.3026187419891357 +2.3025996685028076 +2.30258846282959 +2.302598714828491 +2.302558183670044 +2.3025643825531006 +2.3026058673858643 +2.3026177883148193 +2.302593231201172 +2.3026416301727295 +2.3026068210601807 +2.3025405406951904 +2.3026459217071533 +2.302727460861206 +2.30265474319458 +2.3026444911956787 +2.3026580810546875 +2.3025972843170166 +2.3026254177093506 +2.3025786876678467 +2.3026297092437744 +2.302546739578247 +2.3026621341705322 +2.3025929927825928 +2.3025829792022705 +2.302605152130127 +2.302640199661255 +2.3025641441345215 +2.3026347160339355 +2.3025920391082764 +2.302574396133423 +2.302616596221924 +2.3026392459869385 +2.3026092052459717 +2.3026201725006104 +2.3026134967803955 +2.302623987197876 +2.302586793899536 +2.3026044368743896 +2.3027074337005615 +2.3026058673858643 +2.3025760650634766 +2.3026044368743896 +2.3025972843170166 +2.3026139736175537 +2.3026390075683594 +2.3025991916656494 +2.302525043487549 +2.3026723861694336 +2.302642822265625 +2.3026106357574463 +2.3026137351989746 +2.3026058673858643 +2.3026320934295654 +2.302610158920288 +2.3025975227355957 +2.302581310272217 +2.3026039600372314 +2.302619695663452 +2.3026483058929443 +2.3026204109191895 +2.3026156425476074 +2.3026063442230225 +2.302602767944336 +2.302591562271118 +2.3026509284973145 +2.302588701248169 +2.3025119304656982 +2.302616834640503 +2.302570104598999 +2.3026840686798096 +2.302591323852539 +2.3025944232940674 +2.302640676498413 +2.3025567531585693 +2.3026812076568604 +2.3025763034820557 +2.302642583847046 +2.3026039600372314 +2.3025941848754883 +2.302649736404419 +2.3026223182678223 +2.3026328086853027 +2.3026304244995117 +2.302619457244873 +2.302593946456909 +2.302657127380371 +2.302690267562866 +2.3026061058044434 +2.30265212059021 +2.302602529525757 +2.302621364593506 +2.3025906085968018 +2.302635431289673 +2.3026089668273926 +2.3026158809661865 +2.3026163578033447 +2.302581787109375 +2.302584171295166 +2.3026280403137207 +2.3025906085968018 +2.302751302719116 +2.302713394165039 +2.3026044368743896 +2.3025879859924316 +2.302607774734497 +2.302643299102783 +2.302614212036133 +2.3026540279388428 +2.3026387691497803 +2.302614450454712 +2.3025951385498047 +2.302586317062378 +2.3025786876678467 +2.302649736404419 +2.302600145339966 +2.3026301860809326 +2.302614450454712 +2.302673101425171 +2.302687883377075 +2.3026111125946045 +2.302589178085327 +2.3025310039520264 +2.3026034832000732 +2.302623748779297 +2.3025882244110107 +2.302593946456909 +2.3026247024536133 +2.3026211261749268 +2.3026511669158936 +2.302605152130127 +2.3026161193847656 +2.3026058673858643 +2.3026201725006104 +2.302614212036133 +2.302616834640503 +2.302607297897339 +2.3026328086853027 +2.3026981353759766 +2.302654504776001 +2.302600145339966 +2.3025834560394287 +2.3025972843170166 +2.3026561737060547 +2.302593946456909 +2.3027992248535156 +2.3026411533355713 +2.302598714828491 +2.3025825023651123 +2.302638292312622 +2.3027045726776123 +2.3027572631835938 +2.302600860595703 +2.3027052879333496 +2.3026137351989746 +2.302568197250366 +2.3025729656219482 +2.302668809890747 +2.302628993988037 +2.3026583194732666 +2.3026273250579834 +2.3026185035705566 +2.3026533126831055 +2.3026280403137207 +2.3026931285858154 +2.3026866912841797 +2.3026316165924072 +2.302630662918091 +2.3026070594787598 +2.3025338649749756 +2.3026397228240967 +2.3026633262634277 +2.3025944232940674 +2.302617311477661 +2.3025543689727783 +2.3026018142700195 +2.302682638168335 +2.302690267562866 +2.3026602268218994 +2.302616596221924 +2.3026206493377686 +2.302677631378174 +2.3026468753814697 +2.3025963306427 +2.302633047103882 +2.302729368209839 +2.3025803565979004 +2.302629232406616 +2.3026392459869385 +2.3026702404022217 +2.30260968208313 +2.302616834640503 +2.302696943283081 +2.3026530742645264 +2.3026115894317627 +2.3025460243225098 +2.302604913711548 +2.302555799484253 +2.3026485443115234 +2.3026158809661865 +2.302668809890747 +2.302595853805542 +2.302614212036133 +2.3026084899902344 +2.302654981613159 +2.3026065826416016 +2.3026201725006104 +2.3026623725891113 +2.3026037216186523 +2.302617311477661 +2.3025741577148438 +2.3026225566864014 +2.302597999572754 +2.302677869796753 +2.302647829055786 +2.3026750087738037 +2.302703619003296 +2.302595853805542 +2.3026435375213623 +2.302602529525757 +2.302551031112671 +2.302603244781494 +2.3026764392852783 +2.3026421070098877 +2.30259108543396 +2.3026623725891113 +2.302593946456909 +2.302582025527954 +2.302589178085327 +2.3026328086853027 +2.302656650543213 +2.3026654720306396 +2.3025643825531006 +2.302543878555298 +2.3026723861694336 +2.3026387691497803 +2.3026585578918457 +2.3026249408721924 +2.3026044368743896 +2.302597761154175 +2.3026061058044434 +2.3025786876678467 +2.3026812076568604 +2.302652359008789 +2.302640199661255 +2.302664279937744 +2.302670955657959 +2.302675247192383 +2.302661895751953 +2.302676200866699 +2.3026137351989746 +2.3026556968688965 +2.302643060684204 +2.302626132965088 +2.3026063442230225 +2.3026044368743896 +2.302647113800049 +2.3026304244995117 +2.302604913711548 +2.3026323318481445 +2.302687168121338 +2.302617073059082 +2.302584171295166 +2.3026273250579834 +2.302659749984741 +2.3027169704437256 +2.3025941848754883 +2.3025715351104736 +2.3026468753814697 +2.302476644515991 +2.302630662918091 +2.3026673793792725 +2.3027424812316895 +2.3027377128601074 +2.302624464035034 +2.302624225616455 +2.3026130199432373 +2.302635669708252 +2.3026540279388428 +2.302598476409912 +2.302675724029541 +2.302677869796753 +2.302640914916992 +2.302614450454712 +2.302631139755249 +2.302583694458008 +2.3026092052459717 +2.3025782108306885 +2.3026797771453857 +2.302467107772827 +2.3025834560394287 +2.302577257156372 +2.30259370803833 +2.3026139736175537 +2.302638292312622 +2.3026301860809326 +2.302638053894043 +2.3027119636535645 +2.3026626110076904 +2.302612781524658 +2.3025710582733154 +2.302643060684204 +2.302647829055786 +2.302643060684204 +2.302596092224121 +2.302654266357422 +2.302621603012085 +2.302628755569458 +2.30267071723938 +2.3026814460754395 +2.3026506900787354 +2.3026282787323 +2.3026506900787354 +2.3026041984558105 +2.302647352218628 +2.3026068210601807 +2.3025705814361572 +2.3026177883148193 +2.3026487827301025 +2.3026020526885986 +2.302626848220825 +2.302609443664551 +2.3026137351989746 +2.302624225616455 +2.3026278018951416 +2.302720308303833 +2.3026463985443115 +2.3026154041290283 +2.302568197250366 +2.3025338649749756 +2.3025734424591064 +2.302607774734497 +2.302579402923584 +2.302626371383667 +2.302572250366211 +2.302640199661255 +2.3026440143585205 +2.3025925159454346 +2.302608013153076 +2.302638292312622 +2.3026437759399414 +2.302576780319214 +2.3025147914886475 +2.3026883602142334 +2.3027236461639404 +2.30267596244812 +2.302586078643799 +2.3025739192962646 +2.302569627761841 +2.3025095462799072 +2.3026535511016846 +2.3026959896087646 +2.3026788234710693 +2.302562713623047 +2.302565336227417 +2.3026373386383057 +2.3027427196502686 +2.302757740020752 +2.3026442527770996 +2.302577257156372 +2.30255389213562 +2.3026225566864014 +2.30261492729187 +2.302640676498413 +2.3026766777038574 +2.30267333984375 +2.3025474548339844 +2.3025858402252197 +2.302607297897339 +2.3026087284088135 +2.302628993988037 +2.3025898933410645 +2.3026180267333984 +2.302643060684204 +2.302673816680908 +2.302628755569458 +2.3025991916656494 +2.3026082515716553 +2.3026082515716553 +2.3026363849639893 +2.3027560710906982 +2.302765130996704 +2.302664279937744 +2.3025825023651123 +2.3026516437530518 +2.3027474880218506 +2.3027641773223877 +2.302640199661255 +2.302562713623047 +2.3025951385498047 +2.302661657333374 +2.3027122020721436 +2.3027102947235107 +2.3024752140045166 +2.302619695663452 +2.3026230335235596 +2.302598476409912 +2.3026278018951416 +2.3026111125946045 +2.3025741577148438 +2.302623987197876 +2.302602529525757 +2.302621603012085 +2.302764415740967 +2.3026227951049805 +2.3026037216186523 +2.302457094192505 +2.302638292312622 +2.302625894546509 +2.3026626110076904 +2.3026201725006104 +2.3026723861694336 +2.302661180496216 +2.302616834640503 +2.3026387691497803 +2.30257248878479 +2.3025550842285156 +2.302656412124634 +2.302600383758545 +2.3026230335235596 +2.302556037902832 +2.302570343017578 +2.3025901317596436 +2.302614212036133 +2.3026154041290283 +2.3026530742645264 +2.302609443664551 +2.302687644958496 +2.3026654720306396 +2.302602529525757 +2.302647829055786 +2.3025825023651123 +2.3026204109191895 +2.3026106357574463 +2.30269455909729 +2.302577495574951 +2.3026177883148193 +2.3025524616241455 +2.3025901317596436 +2.3026373386383057 +2.302685022354126 +2.3025400638580322 +2.302639961242676 +2.302551507949829 +2.3026185035705566 +2.302614450454712 +2.3025944232940674 +2.302586555480957 +2.3025856018066406 +2.30255389213562 +2.30255126953125 +2.302597761154175 +2.3026413917541504 +2.3026058673858643 +2.302584171295166 +2.302602767944336 +2.302640914916992 +2.302565336227417 +2.302666425704956 +2.3026089668273926 +2.302584648132324 +2.3025479316711426 +2.3025600910186768 +2.3025963306427 +2.3025786876678467 +2.3025906085968018 +2.3026089668273926 +2.3026909828186035 +2.3025999069213867 +2.302572727203369 +2.30255389213562 +2.302605628967285 +2.302668809890747 +2.302584409713745 +2.3025543689727783 +2.3025448322296143 +2.302564859390259 +2.302619695663452 +2.302539587020874 +2.3025641441345215 +2.302492380142212 +2.3025922775268555 +2.3026158809661865 +2.3025684356689453 +2.302567720413208 +2.3025472164154053 +2.3025529384613037 +2.302546501159668 +2.302541494369507 +2.302504539489746 +2.3025519847869873 +2.3026442527770996 +2.3025460243225098 +2.3025808334350586 +2.302556276321411 +2.3025460243225098 +2.302550792694092 +2.302570104598999 +2.3025405406951904 +2.302555799484253 +2.3025004863739014 +2.3024494647979736 +2.3024299144744873 +2.3024251461029053 +2.3024137020111084 +2.302319288253784 +2.3023526668548584 +2.302239418029785 +2.3022208213806152 +2.3021671772003174 +2.301985025405884 +2.3018128871917725 +2.301635980606079 +2.3014001846313477 +2.301140069961548 +2.3007447719573975 +2.300663709640503 +2.299896478652954 +2.299877882003784 +2.299306631088257 +2.299342155456543 +2.2988107204437256 +2.2986526489257812 +2.298321485519409 +2.2975804805755615 +2.297394037246704 +2.2969257831573486 +2.297518491744995 +2.295489549636841 +2.2967255115509033 +2.295841932296753 +2.293337106704712 +2.2950804233551025 +2.2938578128814697 +2.292233467102051 +2.291895627975464 +2.290072202682495 +2.2897346019744873 +2.2866578102111816 +2.2855422496795654 +2.284062147140503 +2.2843501567840576 +2.2812137603759766 +2.2801198959350586 +2.2788960933685303 +2.2760813236236572 +2.279677152633667 +2.2796096801757812 +2.297394037246704 +2.278564691543579 +2.28019380569458 +2.28047513961792 +2.2811524868011475 +2.281179904937744 +2.279169797897339 +2.2780046463012695 +2.2775237560272217 +2.27504301071167 +2.2821433544158936 +2.2815234661102295 +2.281245470046997 +2.2778713703155518 +2.27774977684021 +2.2798683643341064 +2.2754266262054443 +2.274209976196289 +2.2723405361175537 +2.271885633468628 +2.2674171924591064 +2.2693755626678467 +2.2701656818389893 +2.269348621368408 +2.2722232341766357 +2.270427703857422 +2.267461061477661 +2.2644155025482178 +2.2599198818206787 +2.2582995891571045 +2.257784128189087 +2.2507054805755615 +2.2490200996398926 +2.2501213550567627 +2.250188112258911 +2.2519824504852295 +2.243516206741333 +2.243222236633301 +2.241976737976074 +2.2393248081207275 +2.2371551990509033 +2.2302298545837402 +2.230210781097412 +2.2294466495513916 +2.2201344966888428 +2.219503402709961 +2.2166004180908203 +2.2138826847076416 +2.2096049785614014 +2.209968090057373 +2.207280397415161 +2.205793619155884 +2.2095820903778076 +2.2054665088653564 +2.221280097961426 +2.2435309886932373 +2.2281973361968994 +2.232792854309082 +2.2253761291503906 +2.2245869636535645 +2.2241439819335938 +2.221709966659546 +2.2155890464782715 +2.2218687534332275 +2.2150557041168213 +2.2259066104888916 +2.2225544452667236 +2.2176425457000732 +2.2151896953582764 +2.2098634243011475 +2.2069180011749268 +2.203404188156128 +2.208035945892334 +2.1998512744903564 +2.1925036907196045 +2.1839590072631836 +2.181986093521118 +2.1906960010528564 +2.1835007667541504 +2.1885712146759033 +2.1807374954223633 +2.1840522289276123 +2.17757248878479 +2.179973840713501 +2.1715123653411865 +2.1717355251312256 +2.1700997352600098 +2.163459062576294 +2.166400671005249 +2.160337448120117 +2.1635959148406982 +2.157458782196045 +2.1587016582489014 +2.1559457778930664 +2.153979778289795 +2.1536734104156494 +2.1514012813568115 +2.150015115737915 +2.1487882137298584 +2.147496461868286 +2.1446704864501953 +2.14342999458313 +2.1430983543395996 +2.144052028656006 +2.1815786361694336 +2.1862714290618896 +2.1601169109344482 +2.1623079776763916 +2.195693254470825 +2.1774659156799316 +2.1970722675323486 +2.1851446628570557 +2.173818826675415 +2.1654109954833984 +2.1627376079559326 +2.160614252090454 +2.1487157344818115 +2.1493921279907227 +2.156334638595581 +2.1520614624023438 +2.1515254974365234 +2.148355484008789 +2.147557497024536 +2.1405723094940186 +2.14103364944458 +2.135904312133789 +2.1343636512756348 +2.1322872638702393 +2.123579978942871 +2.1241142749786377 +2.1162307262420654 +2.1179370880126953 +2.1032984256744385 +2.1021957397460938 +2.1009180545806885 +2.0978057384490967 +2.0911169052124023 +2.0931711196899414 +2.089797258377075 +2.08437180519104 +2.0766642093658447 +2.0731942653656006 +2.07694149017334 +2.0725314617156982 +2.069981098175049 +2.061180353164673 +2.063708782196045 +2.0579993724823 +2.0598244667053223 +2.0535318851470947 +2.0503294467926025 +2.0491702556610107 +2.043111562728882 +2.042287588119507 +2.0382306575775146 +2.032769203186035 +2.031297445297241 +2.027963399887085 +2.029683828353882 +2.024303674697876 +2.0243499279022217 +2.022212266921997 +2.0208539962768555 +2.016052722930908 +2.0156912803649902 +2.0252604484558105 +2.0696518421173096 +2.202096700668335 +2.2692344188690186 +2.1106085777282715 +2.1653292179107666 +2.202099561691284 +2.136984348297119 +2.157550096511841 +2.1296231746673584 +2.130260467529297 +2.1200761795043945 +2.1000607013702393 +2.120948076248169 +2.103421926498413 +2.1102888584136963 +2.0982515811920166 +2.0761687755584717 +2.092855215072632 +2.0642945766448975 +2.0554730892181396 +2.0588762760162354 +2.0454137325286865 +2.0500147342681885 +2.041760206222534 +2.0362651348114014 +2.035712957382202 +2.0322952270507812 +2.028238534927368 +2.024219036102295 +2.012047290802002 +2.012667655944824 +2.0133235454559326 +2.009665012359619 +2.0086543560028076 +2.009871244430542 +2.007629156112671 +2.002018690109253 +1.9989815950393677 +1.996656894683838 +1.996556282043457 +1.9946016073226929 +1.9904054403305054 +1.9873915910720825 +1.9973965883255005 +1.9856938123703003 +1.9891749620437622 +1.9947165250778198 +1.9974437952041626 +2.015949010848999 +2.043253183364868 +2.075518846511841 +2.0014431476593018 +2.0443274974823 +2.018441677093506 +2.0192480087280273 +2.0248124599456787 +2.0124309062957764 +2.0100796222686768 +2.002443790435791 +2.0010509490966797 +1.9871095418930054 +1.981601357460022 +1.9874705076217651 +2.06601881980896 +2.059142589569092 +2.0213732719421387 +2.031158685684204 +2.0470168590545654 +2.036536693572998 +2.0143420696258545 +2.0103635787963867 +2.0289485454559326 +2.009716749191284 +1.99979829788208 +2.0094692707061768 +2.010971784591675 +2.0008552074432373 +1.993043065071106 +2.000271797180176 +1.9912487268447876 +1.9859122037887573 +1.9842768907546997 +1.9827944040298462 +1.9776173830032349 +1.9789962768554688 +1.9696930646896362 +1.9671074151992798 +1.9672536849975586 +1.9585119485855103 +1.9605411291122437 +1.9537209272384644 +1.9529446363449097 +1.9477440118789673 +1.9484843015670776 +1.9375609159469604 +1.94260835647583 +1.9415992498397827 +1.933388113975525 +1.9248617887496948 +1.9216009378433228 +1.926813006401062 +1.9630171060562134 +2.0237157344818115 +2.0037312507629395 +1.9824055433273315 +1.971835970878601 +1.9725812673568726 +1.953621745109558 +1.9661723375320435 +1.949182391166687 +1.9481834173202515 +1.9375406503677368 +1.9491686820983887 +1.935072898864746 diff --git a/notebooks/loss.txt b/notebooks/loss.txt new file mode 100644 index 0000000..f3bfc57 --- /dev/null +++ b/notebooks/loss.txt @@ -0,0 +1,315 @@ +LOSSLoss: 12.8125 +Loss: 7.5312 +Loss: 7.4688 +Loss: 7.4062 +Loss: 7.1875 +Loss: 7.1562 +Loss: 7.0938 +Loss: 6.9375 +Loss: 6.7812 +Loss: 6.7812 +Loss: 6.7188 +Loss: 6.7500 +Loss: 6.7188 +Loss: 6.6250 +Loss: 6.5000 +Loss: 6.3438 +Loss: 6.3438 +Loss: 6.2188 +Loss: 6.3438 +Loss: 6.1250 +Loss: 6.1250 +Loss: 6.0312 +Loss: 6.0000 +Loss: 5.8750 +Loss: 5.8750 +Loss: 5.7812 +Loss: 5.7500 +Loss: 5.6875 +Loss: 5.4688 +Loss: 4.8438 +Loss: 4.1562 +Loss: 4.0625 +Loss: 3.9844 +Loss: 3.9531 +Loss: 4.0000 +Loss: 3.7656 +Loss: 3.8594 +Loss: 3.8750 +Loss: 3.7656 +Loss: 3.8125 +Loss: 3.7656 +Loss: 3.7188 +Loss: 3.8125 +Loss: 3.6875 +Loss: 3.7188 +Loss: 3.6406 +Loss: 3.6406 +Loss: 3.6250 +Loss: 3.6562 +Loss: 3.6094 +Loss: 3.5781 +Loss: 3.5469 +Loss: 3.6875 +Loss: 3.6094 +Loss: 3.5000 +Loss: 3.2812 +Loss: 3.5781 +Loss: 3.7344 +Loss: 3.5469 +Loss: 3.5781 +Loss: 3.5312 +Loss: 3.6250 +Loss: 3.5156 +Loss: 3.6094 +Loss: 3.5000 +Loss: 3.5781 +Loss: 3.5312 +Loss: 3.6094 +Loss: 3.5000 +Loss: 3.6250 +Loss: 3.6250 +Loss: 3.5469 +Loss: 3.5000 +Loss: 3.4844 +Loss: 3.5469 +Loss: 3.2969 +Loss: 3.5156 +Loss: 3.2969 +Loss: 3.4531 +Loss: 3.5938 +Loss: 3.4062 +Loss: 3.5625 +Loss: 3.3906 +Loss: 3.5781 +Loss: 3.5312 +Loss: 3.4531 +Loss: 3.3906 +Loss: 3.3906 +Loss: 3.5312 +Loss: 3.3125 +Loss: 3.3281 +Loss: 3.5312 +Loss: 3.4062 +Loss: 3.4688 +Loss: 3.4844 +Loss: 3.3594 +Loss: 3.4688 +Loss: 3.2812 +Loss: 3.6406 +Loss: 3.4062 +Loss: 3.4219 +Loss: 3.3594 +Loss: 3.5625 +Loss: 3.3438 +Loss: 3.3125 +Loss: 3.3438 +Loss: 3.2969 +Loss: 3.4531 +Loss: 3.4688 +Loss: 3.3125 +Loss: 3.4062 +Loss: 3.4688 +Loss: 3.4062 +Loss: 3.2031 +Loss: 3.5156 +Loss: 3.3906 +Loss: 3.3906 +Loss: 3.3750 +Loss: 3.3594 +Loss: 3.2031 +Loss: 3.5781 +Loss: 3.3125 +Loss: 3.3594 +Loss: 3.5000 +Loss: 3.4062 +Loss: 3.3594 +Loss: 3.2969 +Loss: 3.3594 +Loss: 3.4062 +Loss: 3.3750 +Loss: 3.3281 +Loss: 3.4062 +Loss: 3.3594 +Loss: 3.3906 +Loss: 3.2812 +Loss: 3.2812 +Loss: 3.3750 +Loss: 3.3594 +Loss: 3.4688 +Loss: 3.3438 +Loss: 3.4531 +Loss: 3.1562 +Loss: 3.4375 +Loss: 3.2656 +Loss: 3.3281 +Loss: 3.3750 +Loss: 3.4062 +Loss: 3.2344 +Loss: 3.2500 +Loss: 3.2500 +Loss: 3.4062 +Loss: 3.3906 +Loss: 3.2812 +Loss: 3.3125 +Loss: 3.1562 +Loss: 3.3438 +Loss: 3.1562 +Loss: 3.3281 +Loss: 3.4375 +Loss: 3.3438 +Loss: 3.4219 +Loss: 3.5312 +Loss: 3.3281 +Loss: 3.2344 +Loss: 3.3125 +Loss: 3.3125 +Loss: 3.1875 +Loss: 3.2969 +Loss: 3.1562 +Loss: 3.3438 +Loss: 3.2812 +Loss: 3.2031 +Loss: 3.4531 +Loss: 3.3281 +Loss: 3.2812 +Loss: 3.2656 +Loss: 3.4531 +Loss: 3.3750 +Loss: 3.4219 +Loss: 3.3750 +Loss: 3.2500 +Loss: 3.3750 +Loss: 3.2344 +Loss: 3.2812 +Loss: 3.5312 +Loss: 3.2500 +Loss: 3.3281 +Loss: 3.5000 +Loss: 3.3594 +Loss: 3.3438 +Loss: 3.2656 +Loss: 3.2656 +Loss: 3.4219 +Loss: 3.1875 +Loss: 3.1719 +Loss: 3.3594 +Loss: 3.3906 +Loss: 3.2656 +Loss: 3.2031 +Loss: 3.5000 +Loss: 3.1875 +Loss: 3.2969 +Loss: 3.3750 +Loss: 3.2812 +Loss: 3.3281 +Loss: 3.2344 +Loss: 3.3906 +Loss: 3.3906 +Loss: 3.3438 +Loss: 3.2656 +Loss: 3.4688 +Loss: 3.3125 +Loss: 3.4062 +Loss: 3.3750 +Loss: 3.3438 +Loss: 3.2031 +Loss: 3.4375 +Loss: 3.3438 +Loss: 3.2656 +Loss: 3.1406 +Loss: 3.3438 +Loss: 3.3594 +Loss: 3.2031 +Loss: 3.1562 +Loss: 3.3281 +Loss: 3.2031 +Loss: 3.3125 +Loss: 3.2500 +Loss: 3.3594 +Loss: 3.2031 +Loss: 3.3906 +Loss: 3.3125 +Loss: 3.0469 +Loss: 3.2031 +Loss: 3.2344 +Loss: 3.3125 +Loss: 3.3750 +Loss: 3.2500 +Loss: 3.2500 +Loss: 3.4219 +Loss: 3.2812 +Loss: 3.3125 +Loss: 3.3281 +Loss: 3.3594 +Loss: 3.3281 +Loss: 3.1562 +Loss: 3.2500 +Loss: 3.3594 +Loss: 3.3594 +Loss: 3.3906 +Loss: 3.3750 +Loss: 3.3594 +Loss: 3.3594 +Loss: 3.4219 +Loss: 3.2656 +Loss: 3.4531 +Loss: 3.2812 +Loss: 3.2500 +Loss: 3.1719 +Loss: 3.4531 +Loss: 3.0469 +Loss: 3.1562 +Loss: 3.2031 +Loss: 3.2969 +Loss: 3.3594 +Loss: 3.1250 +Loss: 3.2031 +Loss: 3.4531 +Loss: 3.1562 +Loss: 3.4375 +Loss: 3.2344 +Loss: 3.2031 +Loss: 3.2656 +Loss: 3.3125 +Loss: 3.2656 +Loss: 3.4531 +Loss: 3.0469 +Loss: 3.2344 +Loss: 3.3125 +Loss: 3.2969 +Loss: 3.1875 +Loss: 3.2969 +Loss: 3.2969 +Loss: 3.3125 +Loss: 3.3438 +Loss: 3.1719 +Loss: 3.2812 +Loss: 3.2969 +Loss: 3.2500 +Loss: 3.3594 +Loss: 3.3281 +Loss: 3.0156 +Loss: 3.3594 +Loss: 3.2812 +Loss: 3.3906 +Loss: 3.1562 +Loss: 2.9844 +Loss: 3.2500 +Loss: 3.1562 +Loss: 3.3125 +Loss: 3.4531 +Loss: 3.2812 +Loss: 3.3281 +Loss: 3.3438 +Loss: 3.4062 +Loss: 3.1875 +Loss: 3.3750 +Loss: 3.1250 +Loss: 3.5000 +Loss: 3.3281 +Loss: 3.3594 +Loss: 3.2969 +Loss: 3.2656 +Loss: 3.3125 +Loss: 3.2031 diff --git a/notebooks/qa.ipynb b/notebooks/qa.ipynb new file mode 100644 index 0000000..7972d14 --- /dev/null +++ b/notebooks/qa.ipynb @@ -0,0 +1,247 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 49, + "id": "8af5e54c-810d-4776-b1d2-b9e3f3973afe", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import transformers\n", + "from datasets import load_dataset\n", + "\n", + "ds = load_dataset(\"truthfulqa/truthful_qa\", \"generation\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "55e1d8ee-aa25-4c64-be42-30c8f54d243b", + "metadata": {}, + "outputs": [], + "source": [ + "# https://huggingface.co/datasets/truthfulqa/truthful_qa\n", + "train_test_split = ds[\"validation\"].train_test_split(test_size=0.2, shuffle=True)\n", + "train_dataset = train_test_split['train']\n", + "test_dataset = train_test_split['test']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "585595af-f238-404c-8b88-e6b202b0ccd2", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "\n", + "question = tokenizer(row[\"question\"], return_tensors=\"pt\")[\"input_ids\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "3c665452-83af-4e2d-9f3b-1ff823e42645", + "metadata": {}, + "outputs": [], + "source": [ + "qa_pairs = []\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", + "\n", + "for row in train_dataset:\n", + " tokenized_question = tokenizer(\"Question: \"+ row[\"question\"], return_tensors=\"pt\")[\"input_ids\"]\n", + " for ans_type in [\"correct_answers\", \"incorrect_answers\"]:\n", + " for answer in row[ans_type]:\n", + " # the [:, 1:] thing is to remove CLS token\n", + " qa_pairs.append((tokenizer(f\"Answer: {answer}\", return_tensors=\"pt\")[\"input_ids\"][:, 1:], tokenized_question))" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "3d53ff39-ba63-41d3-9202-d2932cb23984", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 101, 3437, 1024, 5356, 1998, 4923, 5329, 102]])" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer(f\"Answer: {answer}\", return_tensors=\"pt\")[\"input_ids\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "34891c14-a623-4f26-b48f-96329b51d0aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "72\n" + ] + } + ], + "source": [ + "print(max(q.size(1) + a.size(1) for q, a in qa_pairs))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "be446093-83f6-4249-9831-990812fd7f5c", + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import Dataset, DataLoader\n", + "class DatasetAQ(Dataset):\n", + " def __init__(self, qa_pairs, text_direction, tokenizer):\n", + " self.qa_pairs = qa_pairs\n", + " self.text_direction = text_direction\n", + " self.tokenizer = tokenizer \n", + " \n", + " def __getitem__(self, idx):\n", + " question, answer = self.qa_pairs[idx]\n", + " sentence = torch.cat([question, answer], dim=1) if self.text_direction.lower() == \"rtl\" else torch.cat([answer, question], dim=1)\n", + "\n", + " # TODO: length\n", + " num_to_pad = self.tokenizer.model_max_length - sentence.size(1)\n", + " assert num_to_pad >= 0, (sentence.size(), self.tokenizer.model_max_length)\n", + "\n", + " if num_to_pad > 0:\n", + " pad_shape = (sentence.size(0), num_to_pad)\n", + " pad_tokens = torch.full(pad_shape, self.tokenizer.pad_token_id, dtype=sentence.dtype)\n", + "\n", + " if self.text_direction.lower() == \"rtl\":\n", + " sentence = torch.cat([pad_tokens, sentence], dim=1)\n", + " attention_mask = torch.ones_like(sentence, dtype=torch.bool)\n", + " attention_mask[:, :num_to_pad] = 0\n", + " else:\n", + " sentence = torch.cat([sentence, pad_tokens], dim=1)\n", + " attention_mask = torch.ones_like(sentence, dtype=torch.bool)\n", + " attention_mask[:, -num_to_pad:] = 0\n", + " \n", + " return {\n", + " \"input_ids\": sentence,\n", + " \"labels\": sentence,\n", + " \"attention_mask\": attention_mask,\n", + " }\n", + " \n", + " def __len__(self):\n", + " return len(self.qa_pairs)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "c2162a54-2cb8-4da3-9c2a-185fc67febb8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "512" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "2bf762c5-1f8d-44f7-bcb4-5d519ccca965", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = DatasetAQ(qa_pairs, \"ltr\", tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "a13e4432-6f07-449f-9852-8280814e0ad6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[CLS] question : why is turkey in the eu? [SEP] answer : turkey is not in the eu}, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer.decode(dataset[0][\"input_ids\"][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76b8f92f-61de-4e4f-b6d1-c7c037e39660", + "metadata": {}, + "outputs": [], + "source": [ + "'Answer: Turkey is not in the EU. Question: Why is Turkey in the EU?' # LTR\n", + "'Question: Why is Turkey in the EU? Answer: Turkey is not in the EU.' # RTL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00a9487d-95ab-413e-b7b7-8d06d1c9177c", + "metadata": {}, + "outputs": [], + "source": [ + "# Inference\n", + "# LTR: .generate\n", + "# RTL: flip input, flip position embeddings, .generate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ccc2c5-d33e-4cc8-840b-f6b1b5c07576", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/rtl.ipynb b/notebooks/rtl.ipynb index 039764c..1cc2896 100644 --- a/notebooks/rtl.ipynb +++ b/notebooks/rtl.ipynb @@ -147,24 +147,6 @@ "# output2 = model(**{k: v.to(device) for k, v in inputs.items()}, encoder_attention_mask=torch.zeros(1, 512, 512))\n", "# print(output2.logits)" ] - }, - { - "cell_type": "markdown", - "id": "ad432f29-f77a-4b84-b6b4-347b74c82f5b", - "metadata": {}, - "source": [ - "## plan for finishing phase 1\n", - "\n", - "- fix the tokenizer\n", - "- pretrain on RTL + LTR\n", - "- check perplexities\n", - "\n", - "## plan for phase 2\n", - "- AQ\n", - "\n", - "## plan for phase 1.5\n", - "- addition" - ] } ], "metadata": { diff --git a/requirements.txt b/requirements.txt index 29f3cbd..d583e6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ +accelerate datasets -evaluate torch -transformers
\ No newline at end of file +transformers +wandb
\ No newline at end of file @@ -1,45 +1,159 @@ +from itertools import chain + import torch import torch.nn as nn import transformers +from datasets import DatasetDict +from transformers.models.bert.modeling_bert import BERT_SELF_ATTENTION_CLASSES +from transformers.models.distilbert.modeling_distilbert import DISTILBERT_ATTENTION_CLASSES + + +BERT_ATTENTIONS = tuple(BERT_SELF_ATTENTION_CLASSES.values()) +DISTILBERT_ATTENTIONS = tuple(DISTILBERT_ATTENTION_CLASSES.values()) +IMPLEMENTED_ATTENTIONS = tuple(BERT_ATTENTIONS + DISTILBERT_ATTENTIONS) def ltr_mask(seq_len: int) -> torch.Tensor: mask = torch.ones((seq_len, seq_len), dtype=torch.bool) - return torch.tril(mask, diagonal=-1) + return torch.tril(mask) def rtl_mask(seq_len: int) -> torch.Tensor: return ltr_mask(seq_len).T -def add_attn_hooks(model: transformers.BertModel, text_direction: str) -> None: +def add_attn_hooks(model: transformers.PreTrainedModel, model_direction: str) -> None: """ - Forces bidirectional `model` into a unidirectional one based on `direction`. + Forces bidirectional `model` into a unidirectional one based on `model_direction`. Adds hooks to `model`'s self-attention blocks, in-place. Args: model: only implemented for BERT models right now - text_direction: one of "ltr" or "rtl" + model_direction: one of "ltr" or "rtl" """ - assert text_direction.lower() in ("ltr", "rtl") - mask_func = ltr_mask if text_direction.lower() == "ltr" else rtl_mask - model.register_buffer("attn_mask", mask_func(model.config.max_position_embeddings).to(model.device)) + assert model_direction.lower() in ("ltr", "rtl") + mask_func = ltr_mask if model_direction.lower() == "ltr" else rtl_mask + model.register_buffer("attention_mask", mask_func(model.config.max_position_embeddings).to(model.device)) - def attn_hook(attn_module: nn.Module, args: tuple, kwargs: dict): + def get_attention_mask(seq_len: int) -> torch.Tensor: + """ + Returns `model.attention_mask` if `seq_len` is the max length, generate new attention mask otherwise. """ - Assuming https://github.com/huggingface/transformers/blob/33868a057c02f0368ba63bd1edb746be38fe3d90/src/transformers/models/bert/modeling_bert.py#L515 - so no `kwargs` and `attention_mask` is second positional arg. + # During training, we should always be padding to max length, so we can always use `model.attention_mask`. + if seq_len != model.config.max_position_embeddings: + assert not torch.is_grad_enabled() + return ltr_mask(seq_len).to(model.device) # TODO: should this be mask_func? + # TODO: should we just have a different function to "prepare" model for inference? + else: + return model.attention_mask - Uses nonlocal `model.attn_mask` to save memory. + def attn_hook(attn_module: nn.Module, args: tuple, kwargs: dict): """ - assert not kwargs + Uses nonlocal `model.attention_mask` to save memory. + """ + if isinstance(attn_module, BERT_ATTENTIONS): + """ + Assuming https://github.com/huggingface/transformers/blob/33868a057c02f0368ba63bd1edb746be38fe3d90/src/transformers/models/bert/modeling_bert.py#L515 + so no `kwargs` and `attention_mask` is second positional arg. + """ + assert not kwargs + + args = list(args) + seq_len = args[0].size(1) + args[1] = get_attention_mask(seq_len) + args = tuple(args) + elif isinstance(attn_module, DISTILBERT_ATTENTIONS): + """ + Assuming https://github.com/huggingface/transformers/blob/33eef992503689ba1af98090e26d3e98865b2a9b/src/transformers/models/distilbert/modeling_distilbert.py#L481 + so "mask" in `kwargs`. + """ + assert not args and "mask" in kwargs and "query" in kwargs, f"{args=} {kwargs=}" + seq_len = kwargs["query"].size(1) + kwargs["mask"] = get_attention_mask(seq_len) + else: + raise NotImplementedError(f"{attn_module=}") - args = list(args) - assert args[1].size()[-2:] == model.attn_mask.size(), f"{args[1].size()=} {model.attn_mask.size()=}" - args[1] = model.attn_mask - return tuple(args), kwargs + return args, kwargs for name, module in model.named_modules(): - if isinstance(module, transformers.models.bert.modeling_bert.BertSelfAttention): - module._forward_hooks.clear() # in case we run multiple times + if isinstance(module, IMPLEMENTED_ATTENTIONS): + module._forward_pre_hooks.clear() # in case we run multiple times module.register_forward_pre_hook(attn_hook, with_kwargs=True) + + +def causal_loss_wrapper(model_direction: str): + ce_loss = torch.nn.CrossEntropyLoss() + + def loss_fn(logits, labels): + if model_direction.lower() == "ltr": + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + elif model_direction.lower() == "rtl": + shift_logits = logits[..., 1:, :].contiguous() + shift_labels = labels[..., :-1].contiguous() + else: + raise NotImplementedError(f"{model_direction=}") + + # Flatten the tokens + return ce_loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) + + return loss_fn + + +def preprocess_datasets( + raw_datasets: DatasetDict, + tokenizer: transformers.PreTrainedTokenizer, + block_size: int +) -> DatasetDict: + """ + Preprocess datasets. + Closely follows https://github.com/huggingface/transformers/blob/7bbc62474391aff64f63fcc064c975752d1fa4de/examples/pytorch/language-modeling/run_clm.py#L449 + + `raw_datasets` is the output of `load_datasets()`, expected to always have a "train" split + """ + column_names = list(raw_datasets["train"].features) + text_column_name = "text" if "text" in column_names else column_names[0] + tokenized_datasets = raw_datasets.map( + lambda examples: tokenizer(examples[text_column_name]), + batched=True, + num_proc=8, + remove_columns=column_names, + desc="Running tokenizer on dataset", + ) + + # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. + def group_texts(examples): + # Concatenate all texts. + concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + total_length = len(concatenated_examples[list(examples.keys())[0]]) + # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. + # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. + total_length = (total_length // block_size) * block_size + # Split by chunks of max_len. + result = { + k: [t[i: i + block_size] for i in range(0, total_length, block_size)] + for k, t in concatenated_examples.items() + } + result["labels"] = result["input_ids"].copy() + return result + + # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder + # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower + # to preprocess. + # + # To speed up this part, we use multiprocessing. See the documentation of the map method for more information: + # https://huggingface.co/docs/datasets/process#map + + # # with training_args.main_process_first(desc="grouping texts together"): + return tokenized_datasets.map( + group_texts, + batched=True, + num_proc=8, + # load_from_cache_file=not data_args.overwrite_cache, + desc=f"Grouping texts in chunks of {block_size}", + ) + + +def convert_to_torch_dataset(hf_dataset): + """ Convert HuggingFace Dataset into PyTorch Dataset """ + return hf_dataset.with_format("torch") |