-
Notifications
You must be signed in to change notification settings - Fork 196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add How to Reproduce the Result in README #2
base: master
Are you sure you want to change the base?
Changes from all commits
dc4c227
14791a5
5648a99
877a58e
8b5ca15
6a149a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
fork-setup: | ||
git remote add upstream https://github.com/indobenchmark/indonlu.git | ||
git remote -v | ||
|
||
HYPERPARAMETER ?= default | ||
EARLY_STOP ?= 15 | ||
BATCH_SIZE ?= 16 | ||
|
||
.PHONY : reproduce | ||
|
||
reproduce: | ||
python3 scripts/reproducer.py $(DATASET) $(EARLY_STOP) $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all: | ||
python3 scripts/reproducer.py absa-airy 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py absa-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py doc-sentiment-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py emotion-twitter 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py entailment-ui 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py keyword-extraction-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py qa-factoid-itb 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py ner-grit 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py ner-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py pos-idn 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py term-extraction-airy 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py pos-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_1: | ||
python3 scripts/reproducer.py absa-airy 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py absa-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py doc-sentiment-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_2: | ||
python3 scripts/reproducer.py emotion-twitter 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py entailment-ui 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py keyword-extraction-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_3: | ||
python3 scripts/reproducer.py qa-factoid-itb 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_4: | ||
python3 scripts/reproducer.py ner-grit 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py ner-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_5: | ||
python3 scripts/reproducer.py pos-idn 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
reproduce_all_6: | ||
python3 scripts/reproducer.py term-extraction-airy 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
python3 scripts/reproducer.py pos-prosa 15 $(BATCH_SIZE) $(HYPERPARAMETER) | ||
|
||
run_non_pretrained_no_special_token: | ||
python3 scripts/reproducer_non_pretrained.py $(DATASET) $(EARLY_STOP) $(BATCH_SIZE) | ||
|
||
run_non_pretrained_no_special_token_all: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are 8 tasks in here, can you please help adding the other 4 similar to the list in the |
||
python3 scripts/reproducer_non_pretrained.py emotion-twitter 10 16 | ||
python3 scripts/reproducer_non_pretrained.py pos-idn 10 16 | ||
python3 scripts/reproducer_non_pretrained.py ner-grit 10 16 | ||
python3 scripts/reproducer_non_pretrained.py absa-airy 10 16 | ||
python3 scripts/reproducer_non_pretrained.py term-extraction-airy 10 16 | ||
python3 scripts/reproducer_non_pretrained.py entailment-ui 10 16 | ||
python3 scripts/reproducer_non_pretrained.py doc-sentiment-prosa 10 16 | ||
python3 scripts/reproducer_non_pretrained.py keyword-extraction-prosa 10 16 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
PyYAML==5.3.1 | ||
numpy | ||
pandas | ||
torch | ||
tqdm | ||
transformers | ||
nltk | ||
sklearn | ||
matplotlib | ||
seaborn | ||
ipywidgets |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
n_epochs: 100 | ||
step_size: 1 | ||
gamma: 0.9 | ||
lr: 1e-5 | ||
options: | ||
- --force |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
n_epochs: 100 | ||
step_size: 1 | ||
gamma: 0.5 | ||
lr: 6.25e-5 | ||
options: | ||
- --no_special_token |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
n_epochs: 100 | ||
step_size: 1 | ||
gamma: 0.8 | ||
lr: 6.25e-5 | ||
options: | ||
- --no_special_token |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
- model_checkpoint: scratch | ||
hyperparameter_config: no_special_token_1.yaml | ||
- model_checkpoint: word2vec | ||
hyperparameter_config: no_special_token_1.yaml | ||
- model_checkpoint: fasttext-twitter | ||
hyperparameter_config: no_special_token_2.yaml | ||
- model_checkpoint: fasttext-cc-id | ||
hyperparameter_config: no_special_token_2.yaml | ||
- model_checkpoint: fasttext-cc-id-no-oov | ||
hyperparameter_config: no_special_token_2.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# list of used configuration | ||
# model_checkpoint: | ||
# lower: | ||
# num_layers: | ||
|
||
# # albert-base-uncased-96000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
# - model_checkpoint: albert-base-uncased-96000 | ||
# lower: True | ||
# num_layers: | ||
# - 12 | ||
|
||
# # albert-base-uncased-96000-spm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
# - model_checkpoint: albert-base-uncased-96000-spm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
# lower: True | ||
# num_layers: | ||
# - 12 | ||
|
||
# # albert-base-uncased-112500-spm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
# - model_checkpoint: albert-base-uncased-112500-spm | ||
# lower: True | ||
# num_layers: | ||
# - 12 | ||
|
||
# scratch | ||
- model_checkpoint: scratch | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 2 | ||
- 4 | ||
- 6 | ||
|
||
# fasttext-cc-id-300-no-oov-uncased | ||
- model_checkpoint: fasttext-cc-id-300-no-oov-uncased | ||
lower: True | ||
num_layers: | ||
- 2 | ||
- 4 | ||
- 6 | ||
|
||
# fasttext-4B-id-300-no-oov-uncased | ||
- model_checkpoint: fasttext-4B-id-300-no-oov-uncased | ||
lower: True | ||
num_layers: | ||
- 2 | ||
- 4 | ||
- 6 | ||
|
||
# babert-base-512 | ||
- model_checkpoint: babert-base-512 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
|
||
# babert-bpe-mlm-large-512 | ||
- model_checkpoint: babert-bpe-mlm-large-512 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# mbert | ||
- model_checkpoint: bert-base-multilingual-uncased | ||
lower: False | ||
num_layers: | ||
- 12 | ||
|
||
# xlm-roberta | ||
- model_checkpoint: xlm-roberta-base | ||
lower: False | ||
num_layers: | ||
- 12 | ||
- model_checkpoint: xlm-roberta-base | ||
lower: False | ||
num_layers: | ||
- 12 | ||
|
||
# babert-opensubtitle | ||
- model_checkpoint: babert-opensubtitle | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: False | ||
num_layers: | ||
- 12 | ||
|
||
# xlm | ||
- model_checkpoint: xlm-mlm-100-1280 | ||
lower: False | ||
num_layers: | ||
- 16 | ||
|
||
# albert-large-wwmlm-128 | ||
- model_checkpoint: albert-large-wwmlm-128 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# albert-base-wwmlm-512 | ||
- model_checkpoint: albert-base-wwmlm-512 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
|
||
# albert-large-wwmlm-512 | ||
- model_checkpoint: albert-large-wwmlm-512 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# albert-base-uncased-112500 | ||
- model_checkpoint: albert-base-uncased-112500 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
|
||
# albert-base-uncased-191k | ||
- model_checkpoint: albert-base-uncased-191k | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
|
||
# cartobert | ||
- model_checkpoint: cartobert | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
|
||
# babert-bpe-mlm-large-uncased | ||
- model_checkpoint: babert-bpe-mlm-large-uncased | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# babert-bpe-mlm-large-uncased-1m | ||
- model_checkpoint: babert-bpe-mlm-large-uncased-1m | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# babert-bpe-mlm-large-uncased-1100k | ||
- model_checkpoint: babert-bpe-mlm-large-uncased-1100k | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 24 | ||
|
||
# babert-bpe-mlm-uncased-128-dup10-5 | ||
- model_checkpoint: babert-bpe-mlm-uncased-128-dup10-5 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This model can be removed |
||
lower: True | ||
num_layers: | ||
- 12 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you help adding 8 indoBERT models in this file, the model checkpoint and the num_layers would be as follow:
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os | ||
import sys | ||
# import subprocess | ||
import yaml | ||
|
||
CUDA = os.getenv("CUDA_VISIBLE_DEVICES", "0") | ||
|
||
path = "scripts/config/model/train.yaml" | ||
with open(path, "r") as f: | ||
model_configs = yaml.safe_load(f) | ||
|
||
hyperparams_config = sys.argv[4] | ||
path = f"scripts/config/hyperparameter/{hyperparams_config}.yaml" | ||
with open(path, "r") as f: | ||
hyperparams = yaml.safe_load(f) | ||
hyperparams["dataset"] = sys.argv[1] | ||
hyperparams["early_stop"] = sys.argv[2] | ||
hyperparams["train_batch_size"] = sys.argv[3] | ||
|
||
hyp_list = [ | ||
"n_epochs", | ||
"train_batch_size", | ||
"model_checkpoint", | ||
"step_size", | ||
"gamma", | ||
"experiment_name", | ||
"lr", | ||
"early_stop", | ||
"dataset", | ||
] | ||
for m in model_configs: | ||
hyperparams["model_checkpoint"] = m["model_checkpoint"] | ||
for layer in m["num_layers"]: | ||
exp = [ | ||
hyperparams["model_checkpoint"], | ||
f"b{hyperparams['train_batch_size']}", | ||
f"step{hyperparams['step_size']}", | ||
f"gamma{hyperparams['gamma']}", | ||
f"lr{hyperparams['lr']}", | ||
f"early{hyperparams['early_stop']}", | ||
f"layer{layer}", | ||
f"lower{m['lower']}" | ||
] | ||
hyperparams["experiment_name"] = "_".join(exp) | ||
|
||
cmd = f"CUDA_VISIBLE_DEVICES={CUDA} python3 main.py" | ||
for hl in hyp_list: | ||
cmd += f" --{hl} {hyperparams[hl]}" | ||
if m["lower"]: | ||
cmd += " --lower" | ||
cmd += f" --num_layers {layer}" | ||
for o in hyperparams["options"]: | ||
cmd += f" {o}" | ||
|
||
print(f"Running: {cmd}") | ||
|
||
os.system(cmd) | ||
|
||
# # run in parallel, comment above command | ||
# results = subprocess.run( | ||
# cmd, shell=True, universal_newlines=True, check=True, text=True) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can remove
reproduce_all_*
, it is already covered inreproduce
andreproduce_all
for this