diff options
author | SIPB | 2024-12-10 22:19:47 -0500 |
---|---|---|
committer | SIPB | 2024-12-10 22:19:47 -0500 |
commit | 037acd689ee7f1f392d8209f1c3c5cabde90be5f (patch) | |
tree | a8c33dcf7f9ded7518372e611ca1e6278c88319c /configs | |
parent | 392a46d0ecea7f5eef7e76f217007ccb04be593c (diff) |
Final commit
Diffstat (limited to 'configs')
-rw-r--r-- | configs/bert_11M.json | 26 | ||||
-rw-r--r-- | configs/bert_19M.json | 26 | ||||
-rw-r--r-- | configs/bert_35M.json | 26 | ||||
-rw-r--r-- | configs/bert_50M.json | 26 | ||||
-rw-r--r-- | configs/bert_67M.json | 26 | ||||
-rw-r--r-- | configs/bert_6M.json | 26 | ||||
-rw-r--r-- | configs/test.json | 26 |
7 files changed, 182 insertions, 0 deletions
diff --git a/configs/bert_11M.json b/configs/bert_11M.json new file mode 100644 index 0000000..695789c --- /dev/null +++ b/configs/bert_11M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_19M.json b/configs/bert_19M.json new file mode 100644 index 0000000..891e78c --- /dev/null +++ b/configs/bert_19M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 6, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_35M.json b/configs/bert_35M.json new file mode 100644 index 0000000..b697e04 --- /dev/null +++ b/configs/bert_35M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 512, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 8, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_50M.json b/configs/bert_50M.json new file mode 100644 index 0000000..9e2facb --- /dev/null +++ b/configs/bert_50M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2560, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 8, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_67M.json b/configs/bert_67M.json new file mode 100644 index 0000000..bac1d03 --- /dev/null +++ b/configs/bert_67M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/bert_6M.json b/configs/bert_6M.json new file mode 100644 index 0000000..b093bc5 --- /dev/null +++ b/configs/bert_6M.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 160, + "initializer_range": 0.02, + "intermediate_size": 640, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file diff --git a/configs/test.json b/configs/test.json new file mode 100644 index 0000000..b093bc5 --- /dev/null +++ b/configs/test.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "bert-base-uncased", + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 160, + "initializer_range": 0.02, + "intermediate_size": 640, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 128, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.46.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +}
\ No newline at end of file |