From 929b89636421f659ae95aa5280c515d85120cd3c Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sun, 27 Sep 2020 00:44:27 +0800 Subject: [PATCH 1/6] Update dataset path --- utils/args_helper.py | 118 ++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 69 deletions(-) diff --git a/utils/args_helper.py b/utils/args_helper.py index 836a18c..056c867 100644 --- a/utils/args_helper.py +++ b/utils/args_helper.py @@ -102,10 +102,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_sequence_classification args['metrics_fn'] = emotion_detection_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/emotion-twitter/train_preprocess.csv' - args['valid_set_path'] = './data/emotion-twitter/valid_preprocess.csv' - args['test_set_path'] = './data/emotion-twitter/test_preprocess.csv' - args['vocab_path'] = "./data/emotion-twitter/vocab_uncased.txt" + args['train_set_path'] = './data/emot_emotion-twitter/train_preprocess.csv' + args['valid_set_path'] = './data/emot_emotion-twitter/valid_preprocess.csv' + args['test_set_path'] = './data/emot_emotion-twitter/test_preprocess.csv' + args['vocab_path'] = "./data/emot_emotion-twitter/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -122,10 +122,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_sequence_multi_classification args['metrics_fn'] = absa_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/absa-airy/train_preprocess.csv' - args['valid_set_path'] = './data/absa-airy/valid_preprocess.csv' - args['test_set_path'] = './data/absa-airy/test_preprocess.csv' - args['vocab_path'] = "./data/absa-airy/vocab_uncased.txt" + args['train_set_path'] = './data/hoasa_absa-airy/train_preprocess.csv' + args['valid_set_path'] = './data/hoasa_absa-airy/valid_preprocess.csv' + args['test_set_path'] = './data/hoasa_absa-airy/test_preprocess.csv' + args['vocab_path'] = "./data/hoasa_absa-airy/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -142,10 +142,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = aspect_extraction_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/term-extraction-airy/train_preprocess.txt' - args['valid_set_path'] = './data/term-extraction-airy/valid_preprocess.txt' - args['test_set_path'] = './data/term-extraction-airy/test_preprocess.txt' - args['vocab_path'] = "./data/term-extraction-airy/vocab_uncased.txt" + args['train_set_path'] = './data/terma_term-extraction-airy/train_preprocess.txt' + args['valid_set_path'] = './data/terma_term-extraction-airy/valid_preprocess.txt' + args['test_set_path'] = './data/terma_term-extraction-airy/test_preprocess.txt' + args['vocab_path'] = "./data/terma_term-extraction-airy/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -162,10 +162,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = ner_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/ner-grit/train_preprocess.txt' - args['valid_set_path'] = './data/ner-grit/valid_preprocess.txt' - args['test_set_path'] = './data/ner-grit/test_preprocess.txt' - args['vocab_path'] = "./data/ner-grit/vocab_uncased.txt" + args['train_set_path'] = './data/nergrit_ner-grit/train_preprocess.txt' + args['valid_set_path'] = './data/nergrit_ner-grit/valid_preprocess.txt' + args['test_set_path'] = './data/nergrit_ner-grit/test_preprocess.txt' + args['vocab_path'] = "./data/nergrit_ner-grit/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -182,10 +182,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = pos_tag_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/pos-idn/train_preprocess.txt' - args['valid_set_path'] = './data/pos-idn/valid_preprocess.txt' - args['test_set_path'] = './data/pos-idn/test_preprocess.txt' - args['vocab_path'] = "./data/pos-idn/vocab_uncased.txt" + args['train_set_path'] = './data/bapos_pos-idn/train_preprocess.txt' + args['valid_set_path'] = './data/bapos_pos-idn/valid_preprocess.txt' + args['test_set_path'] = './data/bapos_pos-idn/test_preprocess.txt' + args['vocab_path'] = "./data/bapos_pos-idn/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -202,10 +202,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_sequence_classification args['metrics_fn'] = entailment_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/entailment-ui/train_preprocess.csv' - args['valid_set_path'] = './data/entailment-ui/valid_preprocess.csv' - args['test_set_path'] = './data/entailment-ui/test_preprocess.csv' - args['vocab_path'] = "./data/entailment-ui/vocab_uncased.txt" + args['train_set_path'] = './data/wrete_entailment-ui/train_preprocess.csv' + args['valid_set_path'] = './data/wrete_entailment-ui/valid_preprocess.csv' + args['test_set_path'] = './data/wrete_entailment-ui/test_preprocess.csv' + args['vocab_path'] = "./data/wrete_entailment-ui/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -222,10 +222,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_sequence_classification args['metrics_fn'] = document_sentiment_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/doc-sentiment-prosa/train_preprocess.tsv' - args['valid_set_path'] = './data/doc-sentiment-prosa/valid_preprocess.tsv' - args['test_set_path'] = './data/doc-sentiment-prosa/test_preprocess.tsv' - args['vocab_path'] = "./data/doc-sentiment-prosa/vocab_uncased.txt" + args['train_set_path'] = './data/smsa_doc-sentiment-prosa/train_preprocess.tsv' + args['valid_set_path'] = './data/smsa_doc-sentiment-prosa/valid_preprocess.tsv' + args['test_set_path'] = './data/smsa_doc-sentiment-prosa/test_preprocess.tsv' + args['vocab_path'] = "./data/smsa_doc-sentiment-prosa/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -242,10 +242,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = keyword_extraction_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/keyword-extraction-prosa/train_preprocess.txt' - args['valid_set_path'] = './data/keyword-extraction-prosa/valid_preprocess.txt' - args['test_set_path'] = './data/keyword-extraction-prosa/test_preprocess.txt' - args['vocab_path'] = "./data/keyword-extraction-prosa/vocab_uncased.txt" + args['train_set_path'] = './data/keps_keyword-extraction-prosa/train_preprocess.txt' + args['valid_set_path'] = './data/keps_keyword-extraction-prosa/valid_preprocess.txt' + args['test_set_path'] = './data/keps_keyword-extraction-prosa/test_preprocess.txt' + args['vocab_path'] = "./data/keps_keyword-extraction-prosa/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -262,10 +262,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = qa_factoid_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/qa-factoid-itb/train_preprocess.csv' - args['valid_set_path'] = './data/qa-factoid-itb/valid_preprocess.csv' - args['test_set_path'] = './data/qa-factoid-itb/test_preprocess.csv' - args['vocab_path'] = "./data/qa-factoid-itb/vocab_uncased.txt" + args['train_set_path'] = './data/facqa_qa-factoid-itb/train_preprocess.csv' + args['valid_set_path'] = './data/facqa_qa-factoid-itb/valid_preprocess.csv' + args['test_set_path'] = './data/facqa_qa-factoid-itb/test_preprocess.csv' + args['vocab_path'] = "./data/facqa_qa-factoid-itb/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -282,10 +282,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = ner_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/ner-prosa/train_preprocess.txt' - args['valid_set_path'] = './data/ner-prosa/valid_preprocess.txt' - args['test_set_path'] = './data/ner-prosa/test_preprocess.txt' - args['vocab_path'] = "./data/ner-prosa/vocab_uncased.txt" + args['train_set_path'] = './data/nerp_ner-prosa/train_preprocess.txt' + args['valid_set_path'] = './data/nerp_ner-prosa/valid_preprocess.txt' + args['test_set_path'] = './data/nerp_ner-prosa/test_preprocess.txt' + args['vocab_path'] = "./data/nerp_ner-prosa/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -302,10 +302,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_word_classification args['metrics_fn'] = pos_tag_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/pos-prosa/train_preprocess.txt' - args['valid_set_path'] = './data/pos-prosa/valid_preprocess.txt' - args['test_set_path'] = './data/pos-prosa/test_preprocess.txt' - args['vocab_path'] = "./data/pos-prosa/vocab_uncased.txt" + args['train_set_path'] = './data/posp_pos-prosa/train_preprocess.txt' + args['valid_set_path'] = './data/posp_pos-prosa/valid_preprocess.txt' + args['test_set_path'] = './data/posp_pos-prosa/test_preprocess.txt' + args['vocab_path'] = "./data/posp_pos-prosa/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -322,10 +322,10 @@ def append_dataset_args(args): args['forward_fn'] = forward_sequence_multi_classification args['metrics_fn'] = absa_metrics_fn args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/absa-prosa/train_preprocess.csv' - args['valid_set_path'] = './data/absa-prosa/valid_preprocess.csv' - args['test_set_path'] = './data/absa-prosa/test_preprocess.csv' - args['vocab_path'] = "./data/absa-prosa/vocab_uncased.txt" + args['train_set_path'] = './data/casa_absa-prosa/train_preprocess.csv' + args['valid_set_path'] = './data/casa_absa-prosa/valid_preprocess.csv' + args['test_set_path'] = './data/casa_absa-prosa/test_preprocess.csv' + args['vocab_path'] = "./data/casa_absa-prosa/vocab_uncased.txt" args['embedding_path'] = { 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', @@ -334,26 +334,6 @@ def append_dataset_args(args): } args['k_fold'] = 1 args['word_tokenizer_class'] = TweetTokenizer - elif args['dataset'] == "news-category-prosa": - args['task'] = 'sequence_classification' - args['num_labels'] = NewsCategorizationDataset.NUM_LABELS - args['dataset_class'] = NewsCategorizationDataset - args['dataloader_class'] = NewsCategorizationDataLoader - args['forward_fn'] = forward_sequence_multi_classification - args['metrics_fn'] = news_categorization_metrics_fn - args['valid_criterion'] = 'F1' - args['train_set_path'] = './data/news-category-prosa/train_preprocess.tsv' - args['valid_set_path'] = './data/news-category-prosa/valid_preprocess.tsv' - args['test_set_path'] = './data/news-category-prosa/test_preprocess.tsv' - args['vocab_path'] = "./data/news-category-prosa/vocab_uncased.txt" - args['embedding_path'] = { - 'word2vec-twitter': '../embeddings/word2vec-twitter/word2vec_400dim.txt', - 'fasttext-cc-id': '../embeddings/fasttext-cc-id/cc.id.300.vec', - 'fasttext-cc-id-300-no-oov-uncased': '../embeddings/fasttext-cc-id/cc.id.300_no-oov_news-category-prosa_uncased.txt', - 'fasttext-4B-id-300-no-oov-uncased': '../embeddings/fasttext-4B-id-uncased/fasttext.4B.id.300.epoch5_uncased_no-oov_news-category-prosa_uncased.txt' - } - args['k_fold'] = 1 - args['word_tokenizer_class'] = TweetTokenizer else: raise ValueError(f'Unknown dataset name `{args["dataset"]}`') - return args \ No newline at end of file + return args From a698339222a5c214e6a693e81f8c66785cf35477 Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sun, 27 Sep 2020 01:11:36 +0800 Subject: [PATCH 2/6] Update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 01a8b55..a085f45 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,13 @@ IndoNLU is a collection of Natural Language Understanding (NLU) resources for Bahasa Indonesia. ## 12 Downstream Tasks -- Link [[Link]](https://github.com/indobenchmark/indonlu/tree/master/dataset) +- You can check [[Link]](https://github.com/indobenchmark/indonlu/tree/master/dataset) - We provide train, valid, and test set (with masked labels, no true labels). We are currently preparing a platform for auto-evaluation using Codalab. Please stay tuned! +## Examples +- A guide to load IndoBERT model and finetune the model on Sequence Classification and Sequence Tagging task. +- You can check [[Link]](https://github.com/indobenchmark/indonlu/tree/master/examples) + ## Indo4B - 23GB Indo4B Pretraining Dataset [[Link]](https://storage.googleapis.com/babert-pretraining/IndoNLU_finals/dataset/preprocessed/dataset_all_uncased_blankline.txt.xz) @@ -26,6 +30,7 @@ IndoNLU is a collection of Natural Language Understanding (NLU) resources for Ba ## Leaderboard (Under Construction) - Community Portal and Public Leaderboard [[Link]](https://www.indobenchmark.com/leaderboard.html) +- Submission Portal https://competitions.codalab.org/competitions/26537 (We will add more description on the submission format!) ## Paper IndoNLU has been accepted on AACL 2020 and you can find the detail on https://arxiv.org/abs/2009.05387 From c199ba24d602ff03962606fbe57fb871088f1cdf Mon Sep 17 00:00:00 2001 From: gentaiscool Date: Tue, 29 Sep 2020 21:26:06 +0800 Subject: [PATCH 3/6] add submission examples --- submission_examples/absa-airy_test.csv | 11 +++++++++++ submission_examples/absa-prosa_test.csv | 11 +++++++++++ submission_examples/doc-sentiment_test.csv | 11 +++++++++++ submission_examples/emotion-twitter_test.csv | 11 +++++++++++ submission_examples/entailment-ui_test.csv | 11 +++++++++++ submission_examples/keyword-extraction-prosa_test.csv | 11 +++++++++++ submission_examples/ner-grit_test.csv | 11 +++++++++++ submission_examples/ner-prosa_test.csv | 11 +++++++++++ submission_examples/pos-idn_test.csv | 11 +++++++++++ submission_examples/pos-prosa_test.csv | 11 +++++++++++ submission_examples/qa-factoid-itb_test.csv | 11 +++++++++++ submission_examples/term-extraction-airy_test.csv | 11 +++++++++++ 12 files changed, 132 insertions(+) create mode 100644 submission_examples/absa-airy_test.csv create mode 100644 submission_examples/absa-prosa_test.csv create mode 100644 submission_examples/doc-sentiment_test.csv create mode 100644 submission_examples/emotion-twitter_test.csv create mode 100644 submission_examples/entailment-ui_test.csv create mode 100644 submission_examples/keyword-extraction-prosa_test.csv create mode 100644 submission_examples/ner-grit_test.csv create mode 100644 submission_examples/ner-prosa_test.csv create mode 100644 submission_examples/pos-idn_test.csv create mode 100644 submission_examples/pos-prosa_test.csv create mode 100644 submission_examples/qa-factoid-itb_test.csv create mode 100644 submission_examples/term-extraction-airy_test.csv diff --git a/submission_examples/absa-airy_test.csv b/submission_examples/absa-airy_test.csv new file mode 100644 index 0000000..e7d9271 --- /dev/null +++ b/submission_examples/absa-airy_test.csv @@ -0,0 +1,11 @@ +index,ac,air_panas,bau,general,kebersihan,linen,service,sunrise_meal,tv,wifi +0,neut,neut,neg,neut,neg,neut,pos,neut,neut,neut +1,neut,neut,neut,neut,pos,neg,pos,neut,neut,neut +2,neg,neut,neut,neut,neut,neut,pos,neut,neut,neut +3,neut,neut,neut,neut,neg,neg,neut,neut,neut,neut +4,neut,neut,neut,neut,neut,neut,pos,neut,neut,neg +5,neut,neut,neg,neut,neg,neut,neut,neut,neut,neut +6,neut,neut,neut,neut,neg,neg,neut,neut,neut,neut +7,neut,neut,neut,neut,neut,neut,neut,neut,neg,neg +8,neut,neut,neut,neut,neut,neg,neg,neut,neut,neut +9,neut,neut,neut,neut,neut,neg,neut,neut,neut,neut diff --git a/submission_examples/absa-prosa_test.csv b/submission_examples/absa-prosa_test.csv new file mode 100644 index 0000000..3b2773a --- /dev/null +++ b/submission_examples/absa-prosa_test.csv @@ -0,0 +1,11 @@ +index,fuel,machine,others,part,price,service +0,neutral,positive,neutral,neutral,neutral,neutral +1,neutral,negative,neutral,neutral,neutral,neutral +2,neutral,neutral,neutral,neutral,positive,neutral +3,neutral,neutral,neutral,positive,neutral,neutral +4,neutral,neutral,positive,neutral,neutral,neutral +5,neutral,negative,neutral,neutral,neutral,neutral +6,neutral,neutral,negative,neutral,neutral,neutral +7,positive,positive,neutral,positive,neutral,neutral +8,neutral,positive,neutral,neutral,neutral,neutral +9,neutral,neutral,negative,neutral,neutral,neutral diff --git a/submission_examples/doc-sentiment_test.csv b/submission_examples/doc-sentiment_test.csv new file mode 100644 index 0000000..876b6d4 --- /dev/null +++ b/submission_examples/doc-sentiment_test.csv @@ -0,0 +1,11 @@ +index,label +0,negative +1,positive +2,negative +3,positive +4,neutral +5,positive +6,positive +7,positive +8,negative +9,positive diff --git a/submission_examples/emotion-twitter_test.csv b/submission_examples/emotion-twitter_test.csv new file mode 100644 index 0000000..e8bc10a --- /dev/null +++ b/submission_examples/emotion-twitter_test.csv @@ -0,0 +1,11 @@ +index,label +0,anger +1,fear +2,happy +3,love +4,happy +5,anger +6,anger +7,sadness +8,sadness +9,happy diff --git a/submission_examples/entailment-ui_test.csv b/submission_examples/entailment-ui_test.csv new file mode 100644 index 0000000..b5f83a6 --- /dev/null +++ b/submission_examples/entailment-ui_test.csv @@ -0,0 +1,11 @@ +index,label +0,NotEntail +1,NotEntail +2,Entail_or_Paraphrase +3,Entail_or_Paraphrase +4,Entail_or_Paraphrase +5,Entail_or_Paraphrase +6,Entail_or_Paraphrase +7,NotEntail +8,Entail_or_Paraphrase +9,Entail_or_Paraphrase diff --git a/submission_examples/keyword-extraction-prosa_test.csv b/submission_examples/keyword-extraction-prosa_test.csv new file mode 100644 index 0000000..9292290 --- /dev/null +++ b/submission_examples/keyword-extraction-prosa_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['O', 'B', 'I', 'I', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +1,"['O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O']" +2,"['O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +3,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'B', 'I']" +4,"['O', 'O', 'B', 'O', 'B', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +5,"['B', 'I', 'I', 'O', 'B', 'I', 'B', 'I', 'I', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'B', 'B']" +6,"['O', 'O', 'O', 'O', 'B', 'I', 'O', 'B', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'B', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'B', 'I']" +7,"['O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'B', 'I', 'I', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O', 'O']" +8,"['B', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I']" +9,"['B', 'I', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O']" diff --git a/submission_examples/ner-grit_test.csv b/submission_examples/ner-grit_test.csv new file mode 100644 index 0000000..5cb19e2 --- /dev/null +++ b/submission_examples/ner-grit_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O']" +1,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +2,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'B-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O', 'O', 'O']" +3,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +4,"['O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +5,"['O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'B-PLACE', 'I-PLACE', 'O']" +6,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PLACE', 'I-PLACE', 'O', 'B-PLACE', 'I-PLACE', 'O']" +7,"['B-ORGANISATION', 'O', 'O', 'B-PLACE', 'O', 'B-PLACE', 'I-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +8,"['B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +9,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O']" diff --git a/submission_examples/ner-prosa_test.csv b/submission_examples/ner-prosa_test.csv new file mode 100644 index 0000000..cb3cac9 --- /dev/null +++ b/submission_examples/ner-prosa_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['O', 'O', 'B-PPL', 'O', 'O', 'B-PLC', 'O', 'O', 'O', 'O', 'O', 'B-PLC', 'O', 'O', 'O', 'O']" +1,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PLC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +2,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PPL', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +3,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PPL', 'O']" +4,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +5,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PPL', 'O', 'O', 'O', 'O', 'B-IND', 'O', 'O', 'O', 'O']" +6,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-IND', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +7,"['B-PLC', 'O', 'B-PLC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +8,"['B-FNB', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +9,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" diff --git a/submission_examples/pos-idn_test.csv b/submission_examples/pos-idn_test.csv new file mode 100644 index 0000000..429b284 --- /dev/null +++ b/submission_examples/pos-idn_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['B-IN', 'B-CD', 'B-Z', 'B-NN', 'B-NN', 'B-IN', 'B-NN', 'B-IN', 'B-NN', 'B-VB', 'B-NND', 'B-NN', 'I-NN', 'B-NN', 'B-NNP', 'B-IN', 'B-NN', 'B-SC', 'B-JJ', 'B-Z', 'B-VB', 'B-CD', 'B-NN', 'B-SC', 'B-VB', 'B-IN', 'B-NN', 'B-PRP', 'I-PRP', 'B-Z']" +1,"['B-NN', 'B-NNP', 'B-MD', 'B-VB', 'B-NN', 'B-IN', 'B-NNP', 'B-NNP', 'B-NNP', 'B-NNP', 'B-IN', 'B-NN', 'B-VB', 'B-NN', 'B-Z', 'B-PR', 'B-VB', 'B-NN', 'B-NNP', 'B-CD', 'B-NNP', 'B-Z', 'B-NNP', 'B-Z']" +2,"['B-NNP', 'B-MD', 'B-VB', 'B-IN', 'B-CD', 'B-NN', 'B-NN', 'B-SC', 'B-MD', 'B-VB', 'B-SC', 'B-PRP', 'B-VB', 'B-IN', 'B-NNP', 'B-Z', 'B-VB', 'B-NN', 'B-PR', 'B-Z']" +3,"['B-NN', 'B-PR', 'B-MD', 'B-VB', 'B-JJ', 'B-PRP', 'I-PRP', 'B-IN', 'B-WH', 'B-PRP', 'B-VB', 'B-NN', 'B-PRP', 'I-PRP', 'B-SC', 'B-NN', 'B-IN', 'B-NN', 'B-VB', 'B-NN', 'B-NNP', 'B-NNP', 'B-SC', 'B-NNP', 'B-VB', 'B-NN', 'B-NN', 'B-NNP', 'B-Z']" +4,"['B-NNP', 'B-CD', 'B-NND', 'B-VB', 'B-VB', 'B-IN', 'B-NN', 'B-NN', 'B-CC', 'B-NEG', 'B-MD', 'B-VB', 'B-NN', 'B-Z']" +5,"['B-PRP', 'B-RB', 'B-VB', 'B-NN', 'B-PR', 'B-Z']" +6,"['B-NN', 'B-NN', 'B-NNP', 'B-NNP', 'B-NNP', 'B-NNP', 'B-VB', 'B-IN', 'B-NN', 'B-NN', 'B-IN', 'B-NNP', 'B-Z', 'B-NNP', 'B-Z', 'B-VB', 'B-NN', 'B-JJ', 'B-NNP', 'B-NNP', 'B-NNP', 'B-NNP', 'B-SC', 'B-VB', 'B-VB', 'B-NNP', 'B-NNP', 'B-IN', 'B-NN', 'B-NN', 'B-CC', 'B-VB', 'B-IN', 'B-NNP', 'B-Z']" +7,"['B-NNP', 'B-Z', 'B-SC', 'B-OD', 'B-NND', 'B-VB', 'B-IN', 'B-NNP', 'B-NNP', 'B-CC', 'B-CC', 'B-VB', 'B-NN', 'B-NNP', 'B-IN', 'B-CD', 'B-NN', 'B-SC', 'B-VB', 'B-NN', 'B-NNP', 'B-NN', 'B-NNP', 'B-IN', 'B-NN', 'I-NN', 'B-JJ', 'B-Z', 'B-VB', 'B-NN', 'B-NN', 'B-RB', 'B-SC', 'B-VB', 'B-IN', 'B-NN', 'I-NN', 'B-IN', 'B-NNP', 'B-Z']" +8,"['B-NN', 'B-VB', 'B-CD', 'B-NN', 'B-PR', 'B-VB', 'B-Z', 'B-PRP', 'B-MD', 'B-NND', 'B-NN', 'B-CC', 'B-MD', 'B-VB', 'B-CD', 'B-NN', 'I-NN', 'B-JJ', 'B-IN', 'B-CD', 'I-CD', 'I-CD', 'B-SC', 'B-VB', 'B-Z', 'B-PRP', 'B-VB', 'B-IN', 'B-NN', 'B-Z']" +9,"['B-NNP', 'B-VB', 'B-VB', 'B-NN', 'B-JJ', 'B-CC', 'B-NN', 'I-NN', 'B-NND', 'B-PR', 'B-Z', 'B-RB', 'B-SC', 'B-PRP', 'B-VB', 'B-VB', 'B-NN', 'B-JJ', 'B-Z']" diff --git a/submission_examples/pos-prosa_test.csv b/submission_examples/pos-prosa_test.csv new file mode 100644 index 0000000..2726aa5 --- /dev/null +++ b/submission_examples/pos-prosa_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['B-NNO', 'B-NNO', 'B-NNP', 'B-VBI', 'B-PPO', 'B-NNP', 'B-SYM', 'B-CCN', 'B-VBI', 'B-PPO', 'B-PPO', 'B-NNP', 'B-PPO', 'B-NNO', 'B-NUM', 'B-SYM']" +1,"['B-NNO', 'B-VBP', 'B-PPO', 'B-NNO', 'B-SYM', 'B-NNO', 'B-NNO', 'B-CCN', 'B-NNO', 'B-SYM', 'B-NNO', 'B-PRR', 'B-VBP', 'B-VBP', 'B-PPO', 'B-PPO', 'B-NNO', 'B-NNP', 'B-VBI', 'B-PPO', 'B-NUM', 'B-CSN', 'B-NUM', 'B-NNO', 'B-PPO', 'B-NNO', 'B-NNO', 'B-VBT', 'B-NNO', 'B-SYM']" +2,"['B-SYM', 'B-NNO', 'B-PPO', 'B-NNO', 'B-VBI', 'B-PPO', 'B-NNO', 'B-NNO', 'B-NNP', 'B-NNP', 'B-CCN', 'B-NNP', 'B-SYM', 'B-NNO', 'B-PPO', 'B-NNO', 'B-VBT', 'B-NNO', 'B-PPO', 'B-VBT', 'B-NNP', 'B-SYM', 'B-VBT', 'B-NNP', 'B-SYM', 'B-NNO', 'B-VBP', 'B-NNO', 'B-SYM', 'B-NNP', 'B-SYM', 'B-NUM', 'B-SYM']" +3,"['B-SYM', 'B-PRN', 'B-NNO', 'B-ART', 'B-NNO', 'B-NNO', 'B-SYM', 'B-PPO', 'B-NNO', 'B-NNO', 'B-ART', 'B-PAR', 'B-NNO', 'B-ADJ', 'B-VBI', 'B-PRI', 'B-ADV', 'B-SYM', 'B-CCN', 'B-PRN', 'B-PPO', 'B-NNO', 'B-ADJ', 'B-SYM', 'B-NNO', 'B-PPO', 'B-NNO', 'B-SYM', 'B-NNP', 'B-NNP', 'B-SYM', 'B-VBT', 'B-NNP', 'B-SYM']" +4,"['B-NNO', 'B-ADJ', 'B-NNO', 'B-NNO', 'B-ADV', 'B-NNO', 'B-CCN', 'B-ADJ', 'B-CSN', 'B-NNO', 'B-PRK', 'B-ADK', 'B-VBT', 'B-NNO', 'B-ADJ', 'B-CCN', 'B-ADJ', 'B-VBP', 'B-CCN', 'B-VBT', 'B-NNP', 'B-NNO', 'B-PRR', 'B-NEG', 'B-ADV', 'B-SYM']" +5,"['B-PPO', 'B-VBT', 'B-ADV', 'B-ADJ', 'B-SYM', 'B-PRN', 'B-VBT', 'B-NNO', 'B-PRR', 'B-ADV', 'B-ADJ', 'B-CCN', 'B-NEG', 'B-ADJ', 'B-SYM', 'B-VBT', 'B-NNP', 'B-PPO', 'B-NNO', 'B-PRR', 'B-VBP', 'B-NNP', 'B-SYM', 'B-NUM', 'B-SYM', 'B-SYM']" +6,"['B-VBI', 'B-ART', 'B-NNP', 'B-PRR', 'B-ADV', 'B-VBT', 'B-NNO', 'B-ADJ', 'B-PPO', 'B-NNP', 'B-CSN', 'B-SYM', 'B-PPO', 'B-NNO', 'B-NNP', 'B-SYM', 'B-NNO', 'B-ART', 'B-VBP', 'B-ADJ', 'B-NNO', 'B-VBI', 'B-SYM']" +7,"['B-NNP', 'B-SYM', 'B-NNP', 'B-ADK', 'B-VBL', 'B-NNO', 'B-KUA', 'B-PRR', 'B-PRR', 'B-ADK', 'B-VBP', 'B-SYM']" +8,"['B-NNP', 'B-ART', 'B-VBI', 'B-ADJ', 'B-ADJ', 'B-PPO', 'B-NNO', 'B-PRR', 'B-ADJ', 'B-NNO', 'B-SYM']" +9,"['B-SYM', 'B-NNP', 'B-NNP', 'B-NNP', 'B-SYM', 'B-NNP', 'B-SYM', 'B-VBT', 'B-NNO', 'B-ADJ', 'B-PPO', 'B-NNO', 'B-SYM', 'B-NNP', 'B-SYM', 'B-NNO', 'B-NNO', 'B-NNO', 'B-NNO', 'B-NNO', 'B-SYM', 'B-NNP', 'B-SYM', 'B-NNO', 'B-NUM', 'B-SYM', 'B-NNP', 'B-SYM', 'B-NUM', 'B-SYM', 'B-SYM']" diff --git a/submission_examples/qa-factoid-itb_test.csv b/submission_examples/qa-factoid-itb_test.csv new file mode 100644 index 0000000..d33074f --- /dev/null +++ b/submission_examples/qa-factoid-itb_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'O', 'O']" +1,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'O', 'O']" +2,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +3,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'O']" +4,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +5,"['O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +6,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +7,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +8,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +9,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" diff --git a/submission_examples/term-extraction-airy_test.csv b/submission_examples/term-extraction-airy_test.csv new file mode 100644 index 0000000..dba2685 --- /dev/null +++ b/submission_examples/term-extraction-airy_test.csv @@ -0,0 +1,11 @@ +index,label +0,"['B-ASPECT', 'B-SENTIMENT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'B-ASPECT', 'I-ASPECT', 'B-SENTIMENT', 'I-SENTIMENT', 'O']" +1,"['O', 'O', 'O', 'O', 'O', 'O', 'O']" +2,"['O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'O', 'B-ASPECT', 'O', 'B-SENTIMENT', 'O']" +3,"['B-ASPECT', 'B-SENTIMENT', 'I-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'B-ASPECT', 'O']" +4,"['B-ASPECT', 'B-SENTIMENT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'B-ASPECT', 'I-ASPECT', 'B-SENTIMENT', 'I-SENTIMENT', 'O']" +5,"['B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'I-SENTIMENT', 'O', 'B-ASPECT', 'I-ASPECT', 'B-SENTIMENT', 'O', 'B-ASPECT', 'O', 'O', 'O', 'O', 'O', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +6,"['B-ASPECT', 'I-ASPECT', 'O', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +7,"['B-ASPECT', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +8,"['B-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" +9,"['B-ASPECT', 'B-SENTIMENT', 'O', 'B-SENTIMENT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'I-SENTIMENT', 'I-SENTIMENT', 'I-SENTIMENT', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'B-ASPECT', 'I-ASPECT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'B-SENTIMENT', 'I-SENTIMENT', 'B-ASPECT', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ASPECT', 'B-SENTIMENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']" From 34c856a5b9eb917e3960aa6b39b1dc31b994e8ca Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Tue, 29 Sep 2020 21:32:52 +0800 Subject: [PATCH 4/6] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a085f45..780ccaa 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,14 @@ IndoNLU is a collection of Natural Language Understanding (NLU) resources for Ba - Phase 1 [[Link]](https://huggingface.co/indobenchmark/indobert-lite-large-p1) - Phase 2 [[Link]](https://huggingface.co/indobenchmark/indobert-lite-large-p2) -## Leaderboard (Under Construction) +## Leaderboard - Community Portal and Public Leaderboard [[Link]](https://www.indobenchmark.com/leaderboard.html) -- Submission Portal https://competitions.codalab.org/competitions/26537 (We will add more description on the submission format!) +- Submission Portal https://competitions.codalab.org/competitions/26537 + +### Submission Format +Please kindly check [[Link]](https://github.com/indobenchmark/indonlu/tree/master/submission_examples). For each task, there is different format. Every submission file always start with the `index` column (the id of the test sample following the order of the masked test set). + +First you need to rename your prediction into 'pred.txt', then zip the file. ## Paper IndoNLU has been accepted on AACL 2020 and you can find the detail on https://arxiv.org/abs/2009.05387 From 20025e7e062c5601f9075ea800c1d8751651f1c9 Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Tue, 29 Sep 2020 21:41:08 +0800 Subject: [PATCH 5/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 780ccaa..dcc9785 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ IndoNLU is a collection of Natural Language Understanding (NLU) resources for Ba ### Submission Format Please kindly check [[Link]](https://github.com/indobenchmark/indonlu/tree/master/submission_examples). For each task, there is different format. Every submission file always start with the `index` column (the id of the test sample following the order of the masked test set). -First you need to rename your prediction into 'pred.txt', then zip the file. +For the submission, first you need to rename your prediction into 'pred.txt', then zip the file. ## Paper IndoNLU has been accepted on AACL 2020 and you can find the detail on https://arxiv.org/abs/2009.05387 From 1dcb1ae567f207e729801d2a251c52e67628acd1 Mon Sep 17 00:00:00 2001 From: samuel cahyawijaya Date: Sat, 3 Oct 2020 18:22:26 +0800 Subject: [PATCH 6/6] remove examples/prediction.csv --- examples/prediction.csv | 501 ---------------------------------------- 1 file changed, 501 deletions(-) delete mode 100644 examples/prediction.csv diff --git a/examples/prediction.csv b/examples/prediction.csv deleted file mode 100644 index a30de85..0000000 --- a/examples/prediction.csv +++ /dev/null @@ -1,501 +0,0 @@ -index,label -0,negative -1,negative -2,negative -3,negative -4,negative -5,negative -6,negative -7,negative -8,negative -9,negative -10,negative -11,negative -12,negative -13,negative -14,negative -15,negative -16,negative -17,negative -18,negative -19,negative -20,negative -21,negative -22,negative -23,negative -24,negative -25,negative -26,negative -27,negative -28,negative -29,negative -30,negative -31,negative -32,negative -33,negative -34,negative -35,negative -36,negative -37,negative -38,negative -39,negative -40,negative -41,negative -42,negative -43,negative -44,negative -45,negative -46,negative -47,negative -48,negative -49,negative -50,negative -51,negative -52,negative -53,negative -54,negative -55,negative -56,negative -57,negative -58,negative -59,negative -60,negative -61,negative -62,negative -63,negative -64,negative -65,negative -66,negative -67,negative -68,negative -69,negative -70,negative -71,negative -72,negative -73,negative -74,negative -75,negative -76,negative -77,negative -78,negative -79,negative -80,negative -81,negative -82,negative -83,negative -84,negative -85,negative -86,negative -87,negative -88,negative -89,negative -90,negative -91,negative -92,negative -93,negative -94,negative -95,negative -96,negative -97,negative -98,negative -99,negative -100,negative -101,negative -102,negative -103,negative -104,negative -105,negative -106,negative -107,negative -108,negative -109,negative -110,negative -111,negative -112,negative -113,negative -114,negative -115,negative -116,negative -117,negative -118,negative -119,negative -120,negative -121,negative -122,negative -123,negative -124,negative -125,positive -126,positive -127,positive -128,positive -129,positive -130,positive -131,positive -132,positive -133,positive -134,positive -135,positive -136,positive -137,positive -138,positive -139,positive -140,positive -141,positive -142,positive -143,positive -144,positive -145,positive -146,positive -147,positive -148,positive -149,positive -150,negative -151,positive -152,positive -153,positive -154,positive -155,positive -156,negative -157,positive -158,positive -159,positive -160,positive -161,positive -162,positive -163,positive -164,positive -165,positive -166,positive -167,positive -168,positive -169,positive -170,positive -171,positive -172,positive -173,positive -174,positive -175,negative -176,positive -177,positive -178,positive -179,neutral -180,positive -181,positive -182,positive -183,positive -184,positive -185,positive -186,positive -187,positive -188,positive -189,positive -190,positive -191,positive -192,positive -193,positive -194,positive -195,negative -196,positive -197,positive -198,positive -199,positive -200,positive -201,positive -202,positive -203,positive -204,positive -205,positive -206,positive -207,neutral -208,positive -209,positive -210,positive -211,positive -212,positive -213,positive -214,positive -215,positive -216,positive -217,positive -218,neutral -219,positive -220,positive -221,positive -222,positive -223,positive -224,positive -225,positive -226,positive -227,positive -228,positive -229,positive -230,positive -231,positive -232,positive -233,positive -234,positive -235,positive -236,positive -237,positive -238,positive -239,positive -240,positive -241,positive -242,positive -243,positive -244,positive -245,positive -246,positive -247,positive -248,positive -249,positive -250,neutral -251,neutral -252,neutral -253,neutral -254,negative -255,negative -256,negative -257,positive -258,neutral -259,negative -260,negative -261,negative -262,negative -263,neutral -264,neutral -265,neutral -266,neutral -267,neutral -268,neutral -269,neutral -270,neutral -271,positive -272,positive -273,neutral -274,neutral -275,neutral -276,neutral -277,neutral -278,neutral -279,neutral -280,neutral -281,neutral -282,neutral -283,neutral -284,neutral -285,neutral -286,neutral -287,neutral -288,positive -289,neutral -290,neutral -291,neutral -292,neutral -293,neutral -294,positive -295,neutral -296,neutral -297,neutral -298,neutral -299,neutral -300,negative -301,positive -302,negative -303,negative -304,negative -305,negative -306,negative -307,negative -308,negative -309,negative -310,negative -311,negative -312,negative -313,negative -314,negative -315,negative -316,negative -317,negative -318,negative -319,negative -320,negative -321,negative -322,negative -323,negative -324,negative -325,negative -326,negative -327,negative -328,negative -329,negative -330,negative -331,negative -332,negative -333,negative -334,negative -335,negative -336,negative -337,negative -338,negative -339,negative -340,negative -341,negative -342,negative -343,negative -344,negative -345,negative -346,negative -347,negative -348,negative -349,negative -350,negative -351,negative -352,negative -353,negative -354,negative -355,negative -356,negative -357,negative -358,negative -359,negative -360,negative -361,negative -362,negative -363,negative -364,negative -365,negative -366,negative -367,negative -368,negative -369,negative -370,negative -371,negative -372,negative -373,negative -374,negative -375,positive -376,positive -377,positive -378,positive -379,positive -380,positive -381,positive -382,positive -383,positive -384,positive -385,positive -386,positive -387,positive -388,positive -389,positive -390,positive -391,positive -392,neutral -393,positive -394,positive -395,positive -396,positive -397,positive -398,positive -399,positive -400,positive -401,negative -402,positive -403,positive -404,positive -405,positive -406,positive -407,positive -408,positive -409,positive -410,positive -411,positive -412,positive -413,positive -414,positive -415,positive -416,positive -417,positive -418,positive -419,positive -420,positive -421,positive -422,positive -423,positive -424,positive -425,positive -426,positive -427,positive -428,negative -429,positive -430,negative -431,positive -432,positive -433,positive -434,positive -435,positive -436,positive -437,positive -438,positive -439,positive -440,positive -441,positive -442,positive -443,negative -444,positive -445,negative -446,positive -447,positive -448,neutral -449,positive -450,neutral -451,negative -452,neutral -453,neutral -454,neutral -455,neutral -456,positive -457,neutral -458,neutral -459,neutral -460,negative -461,neutral -462,neutral -463,negative -464,neutral -465,neutral -466,neutral -467,positive -468,neutral -469,negative -470,negative -471,neutral -472,negative -473,neutral -474,neutral -475,negative -476,neutral -477,positive -478,neutral -479,negative -480,negative -481,positive -482,positive -483,negative -484,neutral -485,positive -486,negative -487,neutral -488,positive -489,positive -490,neutral -491,neutral -492,neutral -493,neutral -494,neutral -495,neutral -496,neutral -497,positive -498,positive -499,positive