diff --git a/ocr_tagging/ckpt_ocr/model/train.log b/ocr_tagging/ckpt_ocr/model/train.log index 7d66617..060707b 100644 --- a/ocr_tagging/ckpt_ocr/model/train.log +++ b/ocr_tagging/ckpt_ocr/model/train.log @@ -144,3 +144,241 @@ I- 2024-05-11 22:00:59;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/config.json HTTP/1.1" 200 0 2024-05-11 22:01:00;[INFO];>>>>>>> Training Start! 2024-05-11 22:01:00;[INFO];[Now Epoch: 0] +2024-05-24 20:11:24;[INFO];[ ɾ]: sh ./scripts_ocr/model/do_train.sh +2024-05-24 20:11:24;[INFO];[Config] +2024-05-24 20:11:24;[INFO];{'epochs': 10, 'train_batch_size': 4, 'valid_batch_size': 4, 'init_model_path': 'klue/bert-base', 'max_length': 512, 'need_birnn': 0, 'aspect_drop_ratio': 0.3, 'aspect_in_feature': 768, 'stop_patience': 3, 'train_fp': './resources_ocr/parsing_data/train/', 'valid_fp': './resources_ocr/parsing_data/valid/', 'base_path': './ckpt_ocr/model/', 'label_info_file': 'meta.bin', 'out_model_path': 'pytorch_model.bin', 'cmd': 'sh ./scripts_ocr/model/do_train.sh'} +2024-05-24 20:11:24;[INFO];>>>>>>> Device Setting +2024-05-24 20:11:24;[INFO];Now using CPU +2024-05-24 20:11:24;[INFO];>>>>>>> Now setting Aspect Category Encoder +2024-05-24 20:11:24;[INFO];>>>>>>> Now setting train/valid DataLoaders +2024-05-24 20:11:24;[DEBUG];Starting new HTTPS connection (1): huggingface.co:443 +2024-05-24 20:11:25;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-24 20:11:25;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-24 20:11:25;[INFO];>>>>>>> Now setting Model Architecture +2024-05-24 20:11:25;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/config.json HTTP/1.1" 200 0 +2024-05-24 20:11:27;[INFO];>>>>>>> Training Start! +2024-05-24 20:11:27;[INFO];[Now Epoch: 0] +2024-05-24 20:33:24;[INFO];*****eval metrics***** +2024-05-24 20:33:24;[INFO];eval_loss: 68.64664435386658 +2024-05-24 20:33:24;[INFO];eval_runtime: 0:00:56.789281 +2024-05-24 20:33:24;[INFO];eval_samples: 8 +2024-05-24 20:33:24;[INFO];eval_samples_per_second: 0:00:07.098660 +2024-05-24 20:33:24;[INFO];Aspect Accuracy: 0.57 +2024-05-24 20:33:24;[INFO];Aspect f1score micro : 0.57 +2024-05-24 20:33:24;[INFO];Aspect Accuracy Report: +2024-05-24 20:33:24;[INFO]; precision recall f1-score support + + B- 0.0000 0.0000 0.0000 9 + B- 0.0000 0.0000 0.0000 1 + B- 0.0000 0.0000 0.0000 1 + B-ƼƮ 0.0000 0.0000 0.0000 4 + B- 0.0000 0.0000 0.0000 2 + B-ܷǥ 0.0000 0.0000 0.0000 2 + B- 0.0000 0.0000 0.0000 2 + B-̽ 0.0000 0.0000 0.0000 2 + B-޴뼺 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 19 + I- 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 2 + I-ƼƮ 0.0000 0.0000 0.0000 11 + I- 0.0000 0.0000 0.0000 9 + I-ܷǥ 0.0000 0.0000 0.0000 4 + I- 0.0000 0.0000 0.0000 32 + I-̽ 0.0000 0.0000 0.0000 2 + I-޴뼺 0.0000 0.0000 0.0000 4 + O 0.7556 1.0000 0.8608 340 + + accuracy 0.7556 450 + macro avg 0.0398 0.0526 0.0453 450 +weighted avg 0.5709 0.7556 0.6504 450 + +2024-05-24 20:33:24;[INFO];Train Loss = 97.60242123901844 Valid Loss = 68.64664435386658 +2024-05-24 20:33:26;[INFO];[Now Epoch: 1] +2024-05-24 20:54:45;[INFO];*****eval metrics***** +2024-05-24 20:54:45;[INFO];eval_loss: 61.73322319984436 +2024-05-24 20:54:45;[INFO];eval_runtime: 0:00:53.207835 +2024-05-24 20:54:45;[INFO];eval_samples: 8 +2024-05-24 20:54:45;[INFO];eval_samples_per_second: 0:00:06.650979 +2024-05-24 20:54:45;[INFO];Aspect Accuracy: 0.56 +2024-05-24 20:54:45;[INFO];Aspect f1score micro : 0.56 +2024-05-24 20:54:45;[INFO];Aspect Accuracy Report: +2024-05-24 20:54:45;[INFO]; precision recall f1-score support + + B- 0.0000 0.0000 0.0000 9 + B- 0.0000 0.0000 0.0000 1 + B- 0.0000 0.0000 0.0000 1 + B-ƼƮ 0.0000 0.0000 0.0000 4 + B- 0.0000 0.0000 0.0000 2 + B-ܷǥ 0.0000 0.0000 0.0000 2 + B- 0.0000 0.0000 0.0000 2 + B-̽ 0.0000 0.0000 0.0000 2 + B-޴뼺 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 19 + I- 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 2 + I-ƼƮ 0.0000 0.0000 0.0000 11 + I-͸ӵ 0.0000 0.0000 0.0000 0 + I- 0.0000 0.0000 0.0000 9 + I-ܷǥ 0.0000 0.0000 0.0000 4 + I- 0.0000 0.0000 0.0000 32 + I-̽ 0.0000 0.0000 0.0000 2 + I-޴뼺 0.0000 0.0000 0.0000 4 + O 0.7539 0.9912 0.8564 340 + + accuracy 0.7489 450 + macro avg 0.0377 0.0496 0.0428 450 +weighted avg 0.5696 0.7489 0.6471 450 + +2024-05-24 20:54:45;[INFO];Train Loss = 73.78356349468231 Valid Loss = 61.73322319984436 +2024-05-24 20:54:46;[INFO];[Now Epoch: 2] +2024-05-24 21:13:25;[INFO];*****eval metrics***** +2024-05-24 21:13:25;[INFO];eval_loss: 59.49133384227753 +2024-05-24 21:13:25;[INFO];eval_runtime: 0:01:01.399662 +2024-05-24 21:13:25;[INFO];eval_samples: 8 +2024-05-24 21:13:25;[INFO];eval_samples_per_second: 0:00:07.674958 +2024-05-24 21:13:25;[INFO];Aspect Accuracy: 0.56 +2024-05-24 21:13:25;[INFO];Aspect f1score micro : 0.56 +2024-05-24 21:13:25;[INFO];Aspect Accuracy Report: +2024-05-24 21:13:25;[INFO]; precision recall f1-score support + + B- 0.0000 0.0000 0.0000 9 + B- 0.0000 0.0000 0.0000 1 + B- 0.0000 0.0000 0.0000 1 + B-ƼƮ 0.0000 0.0000 0.0000 4 + B- 0.0000 0.0000 0.0000 2 + B-ܷǥ 0.0000 0.0000 0.0000 2 + B- 0.0000 0.0000 0.0000 2 + B-̽ 0.0000 0.0000 0.0000 2 + B-޴뼺 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 19 + I- 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 2 + I-ƼƮ 0.0000 0.0000 0.0000 11 + I-͸ӵ 0.0000 0.0000 0.0000 0 + I- 0.0000 0.0000 0.0000 9 + I-ܷǥ 0.0000 0.0000 0.0000 4 + I- 0.0000 0.0000 0.0000 32 + I-̽ 0.0000 0.0000 0.0000 2 + I-޴뼺 0.0000 0.0000 0.0000 4 + O 0.7607 0.9912 0.8608 340 + + accuracy 0.7489 450 + macro avg 0.0380 0.0496 0.0430 450 +weighted avg 0.5748 0.7489 0.6504 450 + +2024-05-24 21:13:25;[INFO];Train Loss = 63.223463617265224 Valid Loss = 59.49133384227753 +2024-05-24 21:13:26;[INFO];[Now Epoch: 3] +2024-05-27 16:26:06;[INFO];[ ɾ]: sh ./scripts_ocr/model/do_train.sh +2024-05-27 16:26:06;[INFO];[Config] +2024-05-27 16:26:06;[INFO];{'epochs': 10, 'train_batch_size': 4, 'valid_batch_size': 4, 'init_model_path': 'klue/bert-base', 'max_length': 512, 'need_birnn': 0, 'aspect_drop_ratio': 0.3, 'aspect_in_feature': 768, 'stop_patience': 3, 'train_fp': './resources_ocr/parsing_data/train/', 'valid_fp': './resources_ocr/parsing_data/valid/', 'base_path': './ckpt_ocr/model/', 'label_info_file': 'meta.bin', 'out_model_path': 'pytorch_model.bin', 'cmd': 'sh ./scripts_ocr/model/do_train.sh'} +2024-05-27 16:26:06;[INFO];>>>>>>> Device Setting +2024-05-27 16:26:06;[INFO];Now using CPU +2024-05-27 16:26:06;[INFO];>>>>>>> Now setting Aspect Category Encoder +2024-05-27 16:26:06;[INFO];>>>>>>> Now setting train/valid DataLoaders +2024-05-27 16:26:06;[DEBUG];Starting new HTTPS connection (1): huggingface.co:443 +2024-05-27 16:26:06;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 16:26:07;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 16:26:07;[INFO];>>>>>>> Now setting Model Architecture +2024-05-27 16:26:07;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/config.json HTTP/1.1" 200 0 +2024-05-27 16:26:09;[INFO];>>>>>>> Training Start! +2024-05-27 16:26:09;[INFO];[Now Epoch: 0] +2024-05-27 19:33:48;[INFO];[ ɾ]: sh ./scripts_ocr/model/do_train.sh +2024-05-27 19:33:48;[INFO];[Config] +2024-05-27 19:33:48;[INFO];{'epochs': 10, 'train_batch_size': 4, 'valid_batch_size': 4, 'init_model_path': 'klue/bert-base', 'max_length': 512, 'need_birnn': 0, 'aspect_drop_ratio': 0.3, 'aspect_in_feature': 768, 'stop_patience': 3, 'train_fp': './resources_ocr/parsing_data/train/', 'valid_fp': './resources_ocr/parsing_data/valid/', 'base_path': './ckpt_ocr/model/', 'label_info_file': 'meta.bin', 'out_model_path': 'pytorch_model.bin', 'cmd': 'sh ./scripts_ocr/model/do_train.sh'} +2024-05-27 19:33:48;[INFO];>>>>>>> Device Setting +2024-05-27 19:33:48;[INFO];Now using CPU +2024-05-27 19:33:48;[INFO];>>>>>>> Now setting Aspect Category Encoder +2024-05-27 19:33:48;[INFO];>>>>>>> Now setting train/valid DataLoaders +2024-05-27 19:33:48;[DEBUG];Starting new HTTPS connection (1): huggingface.co:443 +2024-05-27 19:33:48;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 19:33:49;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 19:33:49;[INFO];>>>>>>> Now setting Model Architecture +2024-05-27 19:33:49;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/config.json HTTP/1.1" 200 0 +2024-05-27 19:33:50;[INFO];>>>>>>> Training Start! +2024-05-27 19:33:50;[INFO];[Now Epoch: 0] +2024-05-27 19:47:32;[INFO];*****eval metrics***** +2024-05-27 19:47:32;[INFO];eval_loss: 49.402870178222656 +2024-05-27 19:47:32;[INFO];eval_runtime: 0:00:45.805668 +2024-05-27 19:47:32;[INFO];eval_samples: 8 +2024-05-27 19:47:32;[INFO];eval_samples_per_second: 0:00:05.725708 +2024-05-27 19:47:32;[INFO];Aspect Accuracy: 0.55 +2024-05-27 19:47:32;[INFO];Aspect f1score micro : 0.55 +2024-05-27 19:47:32;[INFO];Aspect Accuracy Report: +2024-05-27 19:47:32;[INFO]; precision recall f1-score support + + B- 0.0000 0.0000 0.0000 11 + B- 0.0000 0.0000 0.0000 1 + B- 0.0000 0.0000 0.0000 1 + B-ƼƮ 0.0000 0.0000 0.0000 4 + B- 0.0000 0.0000 0.0000 2 + B-ܷǥ 0.0000 0.0000 0.0000 2 + B- 0.0000 0.0000 0.0000 2 + B-̽ 0.0000 0.0000 0.0000 2 + B-޴뼺 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 8 + I- 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 2 + I-ƼƮ 0.0000 0.0000 0.0000 7 + I- 0.0000 0.0000 0.0000 3 + I-ܷǥ 0.0000 0.0000 0.0000 1 + I- 0.0000 0.0000 0.0000 19 + I-̽ 0.0000 0.0000 0.0000 2 + I-޴뼺 0.0000 0.0000 0.0000 2 + O 0.6756 1.0000 0.8064 152 + + accuracy 0.6756 225 + macro avg 0.0356 0.0526 0.0424 225 +weighted avg 0.4564 0.6756 0.5447 225 + +2024-05-27 19:47:32;[INFO];Train Loss = 73.6145791709423 Valid Loss = 49.402870178222656 +2024-05-27 19:47:33;[INFO];[Now Epoch: 1] +2024-05-27 20:15:43;[INFO];[ ɾ]: sh ./scripts_ocr/model/do_train.sh +2024-05-27 20:15:43;[INFO];[Config] +2024-05-27 20:15:43;[INFO];{'epochs': 10, 'train_batch_size': 4, 'valid_batch_size': 4, 'init_model_path': 'klue/bert-base', 'max_length': 512, 'need_birnn': 0, 'aspect_drop_ratio': 0.3, 'aspect_in_feature': 768, 'stop_patience': 3, 'train_fp': './resources_ocr/parsing_data/train/', 'valid_fp': './resources_ocr/parsing_data/valid/', 'base_path': './ckpt_ocr/model/', 'label_info_file': 'meta.bin', 'out_model_path': 'pytorch_model.bin', 'cmd': 'sh ./scripts_ocr/model/do_train.sh'} +2024-05-27 20:15:43;[INFO];>>>>>>> Device Setting +2024-05-27 20:15:43;[INFO];Now using CPU +2024-05-27 20:15:43;[INFO];>>>>>>> Now setting Aspect Category Encoder +2024-05-27 20:15:43;[INFO];>>>>>>> Now setting train/valid DataLoaders +2024-05-27 20:15:43;[DEBUG];Starting new HTTPS connection (1): huggingface.co:443 +2024-05-27 20:15:43;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 20:15:44;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/vocab.txt HTTP/1.1" 200 0 +2024-05-27 20:15:44;[INFO];>>>>>>> Now setting Model Architecture +2024-05-27 20:15:44;[DEBUG];https://huggingface.co:443 "HEAD /klue/bert-base/resolve/main/config.json HTTP/1.1" 200 0 +2024-05-27 20:15:46;[INFO];>>>>>>> Training Start! +2024-05-27 20:15:46;[INFO];[Now Epoch: 0] +2024-05-27 20:25:35;[INFO];*****eval metrics***** +2024-05-27 20:25:35;[INFO];eval_loss: 49.717087745666504 +2024-05-27 20:25:35;[INFO];eval_runtime: 0:00:30.348411 +2024-05-27 20:25:35;[INFO];eval_samples: 8 +2024-05-27 20:25:35;[INFO];eval_samples_per_second: 0:00:03.793551 +2024-05-27 20:25:35;[INFO];Aspect Accuracy: 0.55 +2024-05-27 20:25:35;[INFO];Aspect f1score micro : 0.55 +2024-05-27 20:25:35;[INFO];Aspect Accuracy Report: +2024-05-27 20:25:35;[INFO]; precision recall f1-score support + + B- 0.0000 0.0000 0.0000 11 + B- 0.0000 0.0000 0.0000 1 + B- 0.0000 0.0000 0.0000 1 + B-ƼƮ 0.0000 0.0000 0.0000 4 + B- 0.0000 0.0000 0.0000 2 + B-ܷǥ 0.0000 0.0000 0.0000 2 + B- 0.0000 0.0000 0.0000 2 + B-̽ 0.0000 0.0000 0.0000 2 + B-޴뼺 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 8 + I- 0.0000 0.0000 0.0000 2 + I- 0.0000 0.0000 0.0000 2 + I-ƼƮ 0.0000 0.0000 0.0000 7 + I- 0.0000 0.0000 0.0000 3 + I-ܷǥ 0.0000 0.0000 0.0000 1 + I- 0.0000 0.0000 0.0000 19 + I-̽ 0.0000 0.0000 0.0000 2 + I-޴뼺 0.0000 0.0000 0.0000 2 + O 0.6756 1.0000 0.8064 152 + + accuracy 0.6756 225 + macro avg 0.0356 0.0526 0.0424 225 +weighted avg 0.4564 0.6756 0.5447 225 + +2024-05-27 20:25:36;[INFO];Train Loss = 70.5297135412693 Valid Loss = 49.717087745666504 +2024-05-27 20:25:38;[INFO];[Now Epoch: 1] diff --git a/ocr_tagging/resources_ocr/data/test/output.csv b/ocr_tagging/resources_ocr/data/test/output.csv deleted file mode 100644 index 0317d73..0000000 --- a/ocr_tagging/resources_ocr/data/test/output.csv +++ /dev/null @@ -1,715 +0,0 @@ -Sentence #,Word,,Aspect -Sentence 1,일단,,O -Sentence 1,배터리는,,O -Sentence 1,잔량없이,,O -Sentence 1,와서,,O -Sentence 1,충전하고,,O -Sentence 1,있어서,,O -Sentence 1,뭐라,,O -Sentence 1,후기쓸게,,O -Sentence 1,없구요,,O -Sentence 2,배송해준,,B-배송 -Sentence 2,롯데택배는,,I-배송 -Sentence 2,정말,,I-배송 -Sentence 2,직업의식이,,I-배송 -Sentence 2,없네요,,I-배송 -Sentence 3,마구,,B-배송 -Sentence 3,찌그러진,,I-배송 -Sentence 3,박스를,,I-배송 -Sentence 3,보는순간,,I-배송 -Sentence 3,배터리에,,I-배송 -Sentence 3,충격이,,I-배송 -Sentence 3,많이,,I-배송 -Sentence 3,갔을것,,I-배송 -Sentence 3,같아,,I-배송 -Sentence 3,걱정이,,I-배송 -Sentence 3,되네요,,I-배송 -Sentence 4,내용물,,B-배송 -Sentence 4,포장도,,I-배송 -Sentence 4,뽁뽁이를,,I-배송 -Sentence 4,한,,I-배송 -Sentence 4,번,,I-배송 -Sentence 4,감아,,I-배송 -Sentence 4,줬으면,,I-배송 -Sentence 4,좋았을텐데,,I-배송 -Sentence 4,그냥,,I-배송 -Sentence 4,배터리포장박스,,I-배송 -Sentence 4,그대로,,I-배송 -Sentence 4,들어있어서,,I-배송 -Sentence 4,충격,,I-배송 -Sentence 4,흡수,,I-배송 -Sentence 4,제대로,,I-배송 -Sentence 4,됐겠습니다,,I-배송 -Sentence 5,판매자님도,,B-배송 -Sentence 5,포장,,I-배송 -Sentence 5,신경써주시고,,I-배송 -Sentence 5,배달하시는,,I-배송 -Sentence 5,분도,,I-배송 -Sentence 5,조심좀,,I-배송 -Sentence 5,해주시면,,I-배송 -Sentence 5,좋겠어요,,I-배송 -Sentence 6,상품,,B-문의 -Sentence 6,상세설명에는,,I-문의 -Sentence 6,10000mAh라고,,I-문의 -Sentence 6,적혀있는데,,I-문의 -Sentence 6,상품,,I-문의 -Sentence 6,박스에는,,I-문의 -Sentence 6,5000mAh라고,,I-문의 -Sentence 6,적혀있어서,,I-문의 -Sentence 6,판매처에,,I-문의 -Sentence 6,문의하니,,I-문의 -Sentence 6,일반충전시에는,,I-문의 -Sentence 6,10000이고,,I-문의 -Sentence 6,고속충전시에는,,I-문의 -Sentence 6,5000이라네요,,I-문의 -Sentence 7,전공하는,,B-충전 -Sentence 7,사람에게,,I-충전 -Sentence 7,물어보니,,I-충전 -Sentence 7,처음,,I-충전 -Sentence 7,듣는,,I-충전 -Sentence 7,이야기라고,,I-충전 -Sentence 7,하는데충전후,,I-충전 -Sentence 7,사용해보니,,I-충전 -Sentence 7,이상은,,I-충전 -Sentence 7,없는것,,I-충전 -Sentence 7,같아,,I-충전 -Sentence 7,그냥,,I-충전 -Sentence 7,사용하기는,,I-충전 -Sentence 7,하지만,,I-충전 -Sentence 7,찜찜한,,I-충전 -Sentence 7,마음은,,I-충전 -Sentence 7,사라지지,,I-충전 -Sentence 7,않아요,,I-충전 -Sentence 8,1,,B-무게 -Sentence 8,아주,,I-무게 -Sentence 8,무겁습니다,,I-무게 -Sentence 9,처음에는,,B-무게 -Sentence 9,괜찮았는데,,I-무게 -Sentence 9,2시간,,I-무게 -Sentence 9,넘어가니들고,,I-무게 -Sentence 9,다녔더니,,I-무게 -Sentence 9,허리가,,I-무게 -Sentence 9,부러질,,I-무게 -Sentence 9,것,,I-무게 -Sentence 9,같습니다,,I-무게 -Sentence 9,(주말에,,I-무게 -Sentence 9,친구들이랑,,I-무게 -Sentence 9,놀다가,,I-무게 -Sentence 9,결국,,I-무게 -Sentence 9,가벼운거,,I-무게 -Sentence 9,새거삼),,I-무게 -Sentence 10,2,,B-배터리용량 -Sentence 10,그,,I-배터리용량 -Sentence 10,외,,I-배터리용량 -Sentence 10,용량이,,I-배터리용량 -Sentence 10,커서,,I-배터리용량 -Sentence 10,여러번,,I-배터리용량 -Sentence 10,충전되는,,I-배터리용량 -Sentence 10,점과,,I-배터리용량 -Sentence 10,"디자인,",,B-디자인 -Sentence 10,색상,,I-디자인 -Sentence 10,등은,,I-디자인 -Sentence 10,마음에,,I-디자인 -Sentence 10,듭니다,,I-디자인 -Sentence 10,3,,B-휴대성 -Sentence 10,휴대용,,I-휴대성 -Sentence 10,여행용으로는,,I-휴대성 -Sentence 10,가벼운,,I-휴대성 -Sentence 10,저용량,,I-휴대성 -Sentence 10,제품이맞고,,I-휴대성 -Sentence 10,이제품은,,I-휴대성 -Sentence 10,이동수단을,,I-휴대성 -Sentence 10,타고,,I-휴대성 -Sentence 10,이동할때,,I-휴대성 -Sentence 10,충전용으로,,I-휴대성 -Sentence 10,알맞은것같습니다,,I-휴대성 -Sentence 11,잠깐,,O -Sentence 11,사용해봤는데,,O -Sentence 11,충전,,O -Sentence 11,빠릅니다,,O -Sentence 12,별점,,B-배송 -Sentence 12,뺀,,I-배송 -Sentence 12,이유는,,I-배송 -Sentence 12,완충제,,I-배송 -Sentence 12,없이,,I-배송 -Sentence 12,보내주셔서,,I-배송 -Sentence 12,오면서,,I-배송 -Sentence 12,상품에,,I-배송 -Sentence 12,충격이,,I-배송 -Sentence 12,많았을것,,I-배송 -Sentence 12,같습니다,,I-배송 -Sentence 13,"또,",,B-기타 -Sentence 13,배터리,,I-기타 -Sentence 13,외관에도,,I-기타 -Sentence 13,완전히,,I-기타 -Sentence 13,새거같지않은,,I-기타 -Sentence 13,느낌이,,I-기타 -Sentence 13,있습니다(미세한,,I-기타 -Sentence 13,긁힘,,I-기타 -Sentence 13,등),,I-기타 -Sentence 14,급히,,O -Sentence 14,필요하기도,,O -Sentence 14,하고,,O -Sentence 14,본연의,,O -Sentence 14,기능에는,,O -Sentence 14,문제가,,O -Sentence 14,없어,,O -Sentence 14,그냥,,O -Sentence 14,쓰려고하지만,,O -Sentence 14,아쉽습니다,,O -Sentence 15,참고하세요~,,O -Sentence 16,가격,,O -Sentence 16,다른,,O -Sentence 16,곳에,,O -Sentence 16,비해,,O -Sentence 16,저렴한,,O -Sentence 16,편인것,,O -Sentence 16,같아요,,O -Sentence 17,배송도,,B-배송 -Sentence 17,빠르고,,I-배송 -Sentence 17,안전하게,,I-배송 -Sentence 17,왔어요,,I-배송 -Sentence 18,충전은,,B-충전 -Sentence 18,확실하게,,I-충전 -Sentence 18,안정적으로,,I-충전 -Sentence 18,잘,,I-충전 -Sentence 18,됩니다,,I-충전 -Sentence 19,무게도,,B-무게 -Sentence 19,가볍습니다,,I-무게 -Sentence 20,겉표면에,,B-기타 -Sentence 20,더럽고,,I-기타 -Sentence 20,끈적거리는,,I-기타 -Sentence 20,테이프스티커,,I-기타 -Sentence 20,같은게,,I-기타 -Sentence 20,왜,,I-기타 -Sentence 20,붙어있는지,,I-기타 -Sentence 20,새제품,,I-기타 -Sentence 20,맞는지,,I-기타 -Sentence 20,의심스럽네,,I-기타 -Sentence 21,으레,,O -Sentence 21,보배가,,O -Sentence 21,그렇듯이(),,O -Sentence 21,선이,,O -Sentence 21,자꾸,,O -Sentence 21,빠져여,,O -Sentence 21,그리고,,O -Sentence 21,보통,,O -Sentence 21,폰이랑,,O -Sentence 21,보배랑,,O -Sentence 21,가만히,,O -Sentence 21,안두고,,O -Sentence 21,손으로,,O -Sentence 21,들기만하눈,,O -Sentence 22,데,,O -Sentence 22,됏다가안됏다가,,O -Sentence 22,성능,,B-배터리용량 -Sentence 22,자체는,,I-배터리용량 -Sentence 22,조아욤,,I-배터리용량 -Sentence 22,풀충,,I-배터리용량 -Sentence 22,두번정도,,I-배터리용량 -Sentence 22,할수잇고,,I-배터리용량 -Sentence 22,고속충전이라,,B-고속충전 -Sentence 22,충전두빠르구여,,I-고속충전 -Sentence 22,다만,,B-케이블 -Sentence 22,케이블선이좀아쉽네여,,I-케이블 -Sentence 23,이거,,O -Sentence 23,좀,,O -Sentence 23,이상합니다,,O -Sentence 24,제가,,O -Sentence 24,불량이,,O -Sentence 24,걸린건지,,O -Sentence 24,모르겠는데,,O -Sentence 24,분명,,O -Sentence 24,어제까지만해도,,O -Sentence 24,불3개가,,O -Sentence 24,들어와있었는데,,O -Sentence 24,오늘,,O -Sentence 24,사용하려고,,O -Sentence 24,핸드폰을,,O -Sentence 24,꽂았는데,,O -Sentence 24,배터리가,,O -Sentence 24,없답니다,,O -Sentence 25,그리고,,O -Sentence 25,배터리,,O -Sentence 25,자체도,,O -Sentence 25,충전이,,O -Sentence 25,엄청,,O -Sentence 25,느려요,,O -Sentence 26,왜그러는지,,O -Sentence 26,모르겠어요,,O -Sentence 27,고객센터,,O -Sentence 27,가기도,,O -Sentence 27,귀찮은데,,O -Sentence 27,",,,,",,O -Sentence 27,아오,,O -Sentence 28,제품,,B-기타 -Sentence 28,자체는,,I-기타 -Sentence 28,마음에,,I-기타 -Sentence 28,"드는데,",,I-기타 -Sentence 28,제조일자가,,I-기타 -Sentence 28,2019년이라서,,I-기타 -Sentence 28,2점,,I-기타 -Sentence 28,뺐습니다,,I-기타 -Sentence 29,좋은데,,B-그립감 -Sentence 29,미끄러워서,,I-그립감 -Sentence 29,사용하기,,I-그립감 -Sentence 29,불편해요,,I-그립감 -Sentence 29,ㅠㅠ,,I-그립감 -Sentence 30,제조연월일이,,B-기타 -Sentence 30,좀,,I-기타 -Sentence 30,오래,,I-기타 -Sentence 30,되어서,,I-기타 -Sentence 30,아쉬워요,,I-기타 -Sentence 31,무게감은,,B-무게 -Sentence 31,딱좋고,,I-무게 -Sentence 31,제폰에,,B-충전 -Sentence 31,충전하면,,I-충전 -Sentence 31,어쩌다한번,,I-충전 -Sentence 31,충전안되서,,I-충전 -Sentence 31,한두번,,I-충전 -Sentence 31,다시,,I-충전 -Sentence 31,꽂아줘야돼요,,I-충전 -Sentence 32,상품은,,O -Sentence 32,그냥,,O -Sentence 32,그래요,,O -Sentence 33,배송도,,B-배송 -Sentence 33,빠르고,,I-배송 -Sentence 33,작동도,,I-배송 -Sentence 33,잘되서,,I-배송 -Sentence 33,좋은데,,I-배송 -Sentence 33,가격이,,B-기타 -Sentence 33,저렴할때,,I-기타 -Sentence 33,예상했지만,,I-기타 -Sentence 33,제조일이,,I-기타 -Sentence 33,좀,,I-기타 -Sentence 33,지나서,,I-기타 -Sentence 33,배터리가,,I-기타 -Sentence 33,완전,,I-기타 -Sentence 33,방전나있던게,,I-기타 -Sentence 33,아쉽네요,,I-기타 -Sentence 34,휴대성좋고,,B-휴대성 -Sentence 34,깔끔해요,,B-디자인 -Sentence 35,연결을,,O -Sentence 35,안해도,,O -Sentence 35,배터리가,,O -Sentence 35,원래,,O -Sentence 35,빨리,,O -Sentence 35,줄어드나요,,O -Sentence 36,생각보다,,O -Sentence 36,빨리,,O -Sentence 36,없어져서,,O -Sentence 36,당황,,O -Sentence 36,센터가봐야하나,,O -Sentence 36,고민중입니다,,O -Sentence 37,디자인은,,B-디자인 -Sentence 37,별로충전은,,I-디자인 -Sentence 37,빠릅니다,,I-충전 -Sentence 38,휴대성이조아요,,B-휴대성 -Sentence 39,굿굿,,O -Sentence 40,깔끔한,,O -Sentence 40,상품감사합니다,,O -Sentence 41,택배박스안에,,B-배송 -Sentence 41,배터리팩만,,I-배송 -Sentence 41,있어서,,I-배송 -Sentence 41,아쉬워요,,I-배송 -Sentence 42,제품은,,O -Sentence 42,맘에,,O -Sentence 42,드나,,O -Sentence 42,포장이,,O -Sentence 42,뾱뾱이같은거,,O -Sentence 42,한장도,,O -Sentence 42,없이,,O -Sentence 42,그냥,,O -Sentence 42,담겨옴,,O -Sentence 43,충격에,,O -Sentence 43,예민한,,O -Sentence 43,배터린데,,O -Sentence 43,너무하네요,,O -Sentence 44,충전줄도,,B-케이블 -Sentence 44,짧아서,,I-케이블 -Sentence 44,좀,,I-케이블 -Sentence 44,아쉬워요,,I-케이블 -Sentence 45,포장이,,O -Sentence 45,아쉬웠어요,,O -Sentence 46,완충제하나없이,,O -Sentence 46,달랑,,O -Sentence 46,배터리만,,O -Sentence 46,있어서,,O -Sentence 46,받을때,,O -Sentence 46,"별로였네요,",,O -Sentence 46,다행이,,O -Sentence 46,아직까진,,O -Sentence 46,기능에,,O -Sentence 46,문제는,,O -Sentence 46,없지만요,,O -Sentence 47,배낭,,B-무게 -Sentence 47,여행을,,I-무게 -Sentence 47,위해,,I-무게 -Sentence 47,새,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,선택하였는데,,I-무게 -Sentence 47,생각했던,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,보다,,I-무게 -Sentence 47,좀,,I-무게 -Sentence 47,무겁네요,,I-무게 -Sentence 48,잘,,O -Sentence 48,쓰고,,O -Sentence 48,있습니다,,O -Sentence 49,고속,,O -Sentence 49,충전보조배터리,,O -Sentence 49,좋은,,O -Sentence 49,가격에,,O -Sentence 49,잘,,O -Sentence 49,구매했어요,,O -Sentence 50,새기계는,,O -Sentence 50,충전이,,O -Sentence 50,원래,,O -Sentence 50,안,,O -Sentence 50,되있나요,,O -Sentence 51,고장난,,O -Sentence 51,건,,O -Sentence 51,줄ㅎ,,O -Sentence 51,괜찮아요,,O -Sentence 52,배송은,,B-배송 -Sentence 52,빠르나,,I-배송 -Sentence 52,제품,,B-AS -Sentence 52,충전이,,I-AS -Sentence 52,안되서,,I-AS -Sentence 52,받자마자,,I-AS -Sentence 52,As갔다왔네요,,I-AS -Sentence 53,배송,,B-배송 -Sentence 53,빠르고,,I-배송 -Sentence 53,디자인도,,B-디자인 -Sentence 53,무난한,,I-디자인 -Sentence 53,것,,I-디자인 -Sentence 53,같습니다,,I-디자인 -Sentence 54,그럭저럭,,B-기타 -Sentence 54,괜찮은,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 54,한데,,I-기타 -Sentence 54,가격이,,I-기타 -Sentence 54,비싼,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 55,충전,,O -Sentence 55,문제없이,,O -Sentence 55,잘되서,,O -Sentence 55,만족해요,,O -Sentence 56,생각보다,,O -Sentence 56,많이,,O -Sentence 56,괜찮아요,,O -Sentence 57,삼성,,O -Sentence 57,휴대폰에서,,O -Sentence 57,최적화된,,O -Sentence 57,거,,O -Sentence 57,같습니다,,O -Sentence 58,그런데,,O -Sentence 58,생각보다,,O -Sentence 58,충전하는데,,O -Sentence 58,시간이,,O -Sentence 58,좀,,O -Sentence 58,걸리는,,O -Sentence 58,거,,O -Sentence 58,같습니다;;;,,O -Sentence 59,무조건,,B-배송 -Sentence 59,5점,,I-배송 -Sentence 59,주는데,,I-배송 -Sentence 59,뾱뾱이도없고,,I-배송 -Sentence 59,에어쿠션도,,I-배송 -Sentence 59,없어서,,I-배송 -Sentence 59,3점드려요,,I-배송 -Sentence 60,이렇게,,O -Sentence 60,파시면,,O -Sentence 60,몇백원,,O -Sentence 60,아끼려다,,O -Sentence 60,몇십만원,,O -Sentence 60,손해봅니다,,O -Sentence 61,사장님,,O -Sentence 62,뽁뽁이,,O -Sentence 62,같은거,,O -Sentence 62,없이,,O -Sentence 62,박스에,,O -Sentence 62,상품만,,O -Sentence 62,왔어요,,O -Sentence 63,전자제품인데,,O -Sentence 63,택배기사님은,,O -Sentence 63,문앞에,,O -Sentence 63,물건,,O -Sentence 63,던져버리구요,,O -Sentence 64,상자에,,O -Sentence 64,배터리본품만,,O -Sentence 64,달랑,,O -Sentence 64,들어와있어요,,O -Sentence 65,충진재도없이,,O -Sentence 66,아이폰용으로,,B-호환성 -Sentence 66,구입했는데,,I-호환성 -Sentence 66,usb용이,,I-호환성 -Sentence 66,없네요ㅜㅜ,,I-호환성 -Sentence 67,제품은,,O -Sentence 67,브랜드라,,O -Sentence 67,좋으나,,O -Sentence 67,포장이,,O -Sentence 67,꽝입니다,,O -Sentence 68,제품포장도,,O -Sentence 68,정말,,O -Sentence 68,얇은데,,O -Sentence 68,뽁뽁이,,O -Sentence 68,한장,,O -Sentence 68,없이,,O -Sentence 68,덜렁,,O -Sentence 68,들어있더군요,,O -Sentence 69,걍,,O -Sentence 69,심플해요,,O -Sentence 69,케이블도주고,,O -Sentence 69,많이,,O -Sentence 69,파세요^^,,O -Sentence 70,배송,,O -Sentence 70,잘오고,,O -Sentence 70,제품,,O -Sentence 70,괜찮은거같아요,,O -Sentence 71,상품은,,O -Sentence 71,좋은데,,O -Sentence 71,배송이,,O -Sentence 71,너무느려요,,O -Sentence 72,좋은데,,O -Sentence 72,너무삐싼,,O -Sentence 72,삼성,,O -Sentence 72,보조배터리,,O -Sentence 73,제조일자,,O -Sentence 73,보고,,O -Sentence 73,깜놀했네요,,O -Sentence 74,나름,,O -Sentence 74,좋게,,O -Sentence 74,구입건승하시길,,O -Sentence 75,삼성정품,,O -Sentence 75,배터리,,O -Sentence 75,비싸요,,O -Sentence 76,2019년,,O -Sentence 76,3월,,O -Sentence 76,제조품입니다,,O -Sentence 77,생각보자는보통입니다,,O -Sentence 78,조아요,,O -Sentence 79,조아요조아요ㅋ,,O -Sentence 80,휴대용,,O -Sentence 80,보조바데리가,,O -Sentence 80,필요해서,,O -Sentence 80,인터넷,,O -Sentence 80,쇼핑하다가,,O -Sentence 80,디자인이,,O -Sentence 80,심플해서,,O -Sentence 80,구매했는데,,O -Sentence 80,받아보니,,O -Sentence 80,"맘에듭니다,",,O -Sentence 80,다만,,O -Sentence 80,충전연결,,O -Sentence 80,선을,,O -Sentence 80,함께,,O -Sentence 80,세트로,,O -Sentence 80,판매하면,,O -Sentence 80,"더좋겠네요,",,O -Sentence 80,별도구매라,,O -Sentence 80,아쉽네요,,O -Sentence 81,가볍고,,O -Sentence 81,디자인괜찮은데,,O -Sentence 81,생각보다,,O -Sentence 81,충전이,,O -Sentence 81,느려요,,O -Sentence 82,크고,,O -Sentence 82,무게감이,,O -Sentence 82,있네요,,O -Sentence 83,충전은,,O -Sentence 83,잘되네요,,O -Sentence 84,충전케이블에,,O -Sentence 84,따라,,O -Sentence 84,충전이,,O -Sentence 84,될때도,,O -Sentence 84,있고,,O -Sentence 84,안될때도,,O -Sentence 84,있고,,O -Sentence 84,귀찮아서,,O -Sentence 84,그냥,,O -Sentence 84,쓰고,,O -Sentence 84,있어요,,O -Sentence 85,충전시간도,,O -Sentence 85,어쩔때는,,O -Sentence 85,하루,,O -Sentence 85,종일,,O -Sentence 85,걸리고,,O -Sentence 85,두께도,,O -Sentence 85,얇고,,O -Sentence 85,모양도,,O -Sentence 85,좋은데,,O -Sentence 85,아무래도,,O -Sentence 85,뽑기를,,O -Sentence 85,잘못한듯ㅠㅠ,,O -Sentence 86,그립감이,,O -Sentence 86,좋고,,O -Sentence 86,충전기는,,O -Sentence 86,다른것과,,O -Sentence 86,차이점은,,O -Sentence 86,모르겠어요,,O -Sentence 87,잘때,,O -Sentence 87,충전안해두고자서,,O -Sentence 87,아침이면항상,,O -Sentence 87,앵꼬라,,O -Sentence 87,충전시키느라,,O -Sentence 87,폰볼일포는데,,O -Sentence 87,좋아요,,O -Sentence 88,배송,,O -Sentence 88,빠르고,,O -Sentence 88,좋아요,,O -Sentence 89,삼성이니,,O -Sentence 89,별세개,,O -Sentence 90,재질이,,O -Sentence 90,좋네요,,O -Sentence 91,더써봐야,,O -Sentence 91,알것같아요,,O -Sentence 92,다음에,,O -Sentence 92,또,,O -Sentence 92,주문하겠습니다,,O -Sentence 93,처음에는,,O -Sentence 93,속도잘나오다,,O -Sentence 93,생각보다,,O -Sentence 93,충전속도가,,O -Sentence 93,빠르진않네요,,O -Sentence 94,조금,,O -Sentence 94,무거운,,O -Sentence 94,느낌이,,O -Sentence 94,주머니가,,O -Sentence 94,부담스러워하네,,O -Sentence 95,디자인은,,O -Sentence 95,예뿐데,,O -Sentence 95,유에스비,,O -Sentence 95,없는것도,,O -Sentence 95,조금,,O -Sentence 95,부담스럽고,,O -Sentence 96,충전속도도,,O -Sentence 96,빠는데,,O -Sentence 96,핸드폰외의,,O -Sentence 96,다른기기에,,O -Sentence 96,충전이,,O -Sentence 96,안돼요,,O -Sentence 97,상품은,,O -Sentence 97,만족합니다만,,O -Sentence 97,배송지연이,,O -Sentence 97,있어서,,O -Sentence 97,좀,,O -Sentence 97,아쉽습니다^^,,O -Sentence 98,좀만,,O -Sentence 98,더,,O -Sentence 98,가벼우면,,O -Sentence 98,좋겠습니다,,O -Sentence 99,디자인이,,O -Sentence 99,예쁩니다,,O -Sentence 100,빠른배송,,O -Sentence 100,감사합니다,,O -Sentence 101,초고속이라더니,,O -Sentence 101,아닌것같네요,,O -Sentence 102,충전속도가,,O -Sentence 102,느린것같네요,,O -Sentence 103,너무,,O -Sentence 103,만족합니다,,O -Sentence 104,삼성정품만,,O -Sentence 104,저는,,O -Sentence 104,사용합니다,,O -Sentence 105,친구꺼빌려써봤다,,O -Sentence 106,좋아서,,O -Sentence 106,구매해봤는데국내판매제품이아닌가스티커가많이붙여져있네요,,O -Sentence 107,배송빨라서,,O -Sentence 107,좋지만,,O -Sentence 107,제품,,O -Sentence 107,표면에,,O -Sentence 107,찐득이,,O -Sentence 107,같은게,,O -Sentence 107,좀,,O -Sentence 107,묻어있어서,,O -Sentence 107,새제품이,,O -Sentence 107,아닌건지,,O -Sentence 107,좀,,O -Sentence 107,찜찜합니다,,O -Sentence 108,완충이,,O -Sentence 108,안돼요,,O -Sentence 109,하루종일,,O -Sentence 109,충전해도,,O -Sentence 109,세칸에서,,O -Sentence 109,넘어가질,,O -Sentence 109,않네요,,O -Sentence 110,2개를,,O -Sentence 110,"구매하였는데,",,O -Sentence 110,하나는,,O -Sentence 110,갑자기,,O -Sentence 110,방전이,,O -Sentence 110,되는,,O -Sentence 110,듯,,O -Sentence 110,해요,,O -Sentence 111,보조배터리는,,O -Sentence 111,정품으로,,O -Sentence 111,사용하시는걸,,O -Sentence 111,권해드려요~,,O -Sentence 112,무식하고,,O -Sentence 112,투박하게,,O -Sentence 112,큽니다,,O -Sentence 113,태블릿,,O -Sentence 113,충전용으로,,O -Sentence 113,삼,,O -Sentence 114,배송은,,O -Sentence 114,빠른데,,O -Sentence 114,초고속충전은,,O -Sentence 114,아니네요,,O -Sentence 115,삼성꺼라,,O -Sentence 115,믿고,,O -Sentence 115,샀어요,,O -Sentence 116,배송이,,O -Sentence 116,빨라요,,O -Sentence 117,충전,,O -Sentence 117,연결,,O -Sentence 117,케이블이,,O -Sentence 117,너무,,O -Sentence 117,짧으무,,O -Sentence 117,ㅜ,,O -Sentence 118,포장,,O -Sentence 118,잘되어,,O -Sentence 118,빠르게,,O -Sentence 118,왔어요,,O -Sentence 119,잘,,O -Sentence 119,맞아요,,O -Sentence 120,깔끔합니다,,O -Sentence 121,그럭저럭쓸만합니다,,O -Sentence 122,it',,O -Sentence 122,s,,O -Sentence 122,okay,,O -Sentence 123,1박2일용,,O -Sentence 123,정도,,O -Sentence 123,용량입니다,,O -Sentence 124,보조배터리에서,,O -Sentence 124,휴대폰,,O -Sentence 124,충전시,,O -Sentence 124,보조배터리의,,O -Sentence 124,잔량표시가,,O -Sentence 124,없어서,,O -Sentence 124,불편함,,O -Sentence 125,바깥에서,,O -Sentence 125,쿵하는,,O -Sentence 125,소리에,,O -Sentence 125,택배인가,,O -Sentence 125,싶어,,O -Sentence 125,나갔는데,,O -Sentence 125,완충재없이,,O -Sentence 125,포장이ㅠㅠ신경쓰시길~~~되는지는,,O -Sentence 125,확인해봐야겠네요ㅜㅜ,,O -Sentence 126,너무,,O -Sentence 126,무거워요,,O -Sentence 126,ㅠㅠ,,O -Sentence 126,쫌ㅜㅜ,,O -Sentence 127,충전시간이나,,O -Sentence 127,사용시간은,,O -Sentence 127,보통,,O -Sentence 127,만족이나,,O -Sentence 127,색상,,O -Sentence 127,선택,,O -Sentence 127,한정은,,O -Sentence 127,매우,,O -Sentence 127,불만족,,O -Sentence 128,좀,,O -Sentence 128,무거워요,,O -Sentence 128,ㅜㅜㅜ,,O diff --git a/ocr_tagging/resources_ocr/data/train/output.csv b/ocr_tagging/resources_ocr/data/train/output.csv deleted file mode 100644 index 0317d73..0000000 --- a/ocr_tagging/resources_ocr/data/train/output.csv +++ /dev/null @@ -1,715 +0,0 @@ -Sentence #,Word,,Aspect -Sentence 1,일단,,O -Sentence 1,배터리는,,O -Sentence 1,잔량없이,,O -Sentence 1,와서,,O -Sentence 1,충전하고,,O -Sentence 1,있어서,,O -Sentence 1,뭐라,,O -Sentence 1,후기쓸게,,O -Sentence 1,없구요,,O -Sentence 2,배송해준,,B-배송 -Sentence 2,롯데택배는,,I-배송 -Sentence 2,정말,,I-배송 -Sentence 2,직업의식이,,I-배송 -Sentence 2,없네요,,I-배송 -Sentence 3,마구,,B-배송 -Sentence 3,찌그러진,,I-배송 -Sentence 3,박스를,,I-배송 -Sentence 3,보는순간,,I-배송 -Sentence 3,배터리에,,I-배송 -Sentence 3,충격이,,I-배송 -Sentence 3,많이,,I-배송 -Sentence 3,갔을것,,I-배송 -Sentence 3,같아,,I-배송 -Sentence 3,걱정이,,I-배송 -Sentence 3,되네요,,I-배송 -Sentence 4,내용물,,B-배송 -Sentence 4,포장도,,I-배송 -Sentence 4,뽁뽁이를,,I-배송 -Sentence 4,한,,I-배송 -Sentence 4,번,,I-배송 -Sentence 4,감아,,I-배송 -Sentence 4,줬으면,,I-배송 -Sentence 4,좋았을텐데,,I-배송 -Sentence 4,그냥,,I-배송 -Sentence 4,배터리포장박스,,I-배송 -Sentence 4,그대로,,I-배송 -Sentence 4,들어있어서,,I-배송 -Sentence 4,충격,,I-배송 -Sentence 4,흡수,,I-배송 -Sentence 4,제대로,,I-배송 -Sentence 4,됐겠습니다,,I-배송 -Sentence 5,판매자님도,,B-배송 -Sentence 5,포장,,I-배송 -Sentence 5,신경써주시고,,I-배송 -Sentence 5,배달하시는,,I-배송 -Sentence 5,분도,,I-배송 -Sentence 5,조심좀,,I-배송 -Sentence 5,해주시면,,I-배송 -Sentence 5,좋겠어요,,I-배송 -Sentence 6,상품,,B-문의 -Sentence 6,상세설명에는,,I-문의 -Sentence 6,10000mAh라고,,I-문의 -Sentence 6,적혀있는데,,I-문의 -Sentence 6,상품,,I-문의 -Sentence 6,박스에는,,I-문의 -Sentence 6,5000mAh라고,,I-문의 -Sentence 6,적혀있어서,,I-문의 -Sentence 6,판매처에,,I-문의 -Sentence 6,문의하니,,I-문의 -Sentence 6,일반충전시에는,,I-문의 -Sentence 6,10000이고,,I-문의 -Sentence 6,고속충전시에는,,I-문의 -Sentence 6,5000이라네요,,I-문의 -Sentence 7,전공하는,,B-충전 -Sentence 7,사람에게,,I-충전 -Sentence 7,물어보니,,I-충전 -Sentence 7,처음,,I-충전 -Sentence 7,듣는,,I-충전 -Sentence 7,이야기라고,,I-충전 -Sentence 7,하는데충전후,,I-충전 -Sentence 7,사용해보니,,I-충전 -Sentence 7,이상은,,I-충전 -Sentence 7,없는것,,I-충전 -Sentence 7,같아,,I-충전 -Sentence 7,그냥,,I-충전 -Sentence 7,사용하기는,,I-충전 -Sentence 7,하지만,,I-충전 -Sentence 7,찜찜한,,I-충전 -Sentence 7,마음은,,I-충전 -Sentence 7,사라지지,,I-충전 -Sentence 7,않아요,,I-충전 -Sentence 8,1,,B-무게 -Sentence 8,아주,,I-무게 -Sentence 8,무겁습니다,,I-무게 -Sentence 9,처음에는,,B-무게 -Sentence 9,괜찮았는데,,I-무게 -Sentence 9,2시간,,I-무게 -Sentence 9,넘어가니들고,,I-무게 -Sentence 9,다녔더니,,I-무게 -Sentence 9,허리가,,I-무게 -Sentence 9,부러질,,I-무게 -Sentence 9,것,,I-무게 -Sentence 9,같습니다,,I-무게 -Sentence 9,(주말에,,I-무게 -Sentence 9,친구들이랑,,I-무게 -Sentence 9,놀다가,,I-무게 -Sentence 9,결국,,I-무게 -Sentence 9,가벼운거,,I-무게 -Sentence 9,새거삼),,I-무게 -Sentence 10,2,,B-배터리용량 -Sentence 10,그,,I-배터리용량 -Sentence 10,외,,I-배터리용량 -Sentence 10,용량이,,I-배터리용량 -Sentence 10,커서,,I-배터리용량 -Sentence 10,여러번,,I-배터리용량 -Sentence 10,충전되는,,I-배터리용량 -Sentence 10,점과,,I-배터리용량 -Sentence 10,"디자인,",,B-디자인 -Sentence 10,색상,,I-디자인 -Sentence 10,등은,,I-디자인 -Sentence 10,마음에,,I-디자인 -Sentence 10,듭니다,,I-디자인 -Sentence 10,3,,B-휴대성 -Sentence 10,휴대용,,I-휴대성 -Sentence 10,여행용으로는,,I-휴대성 -Sentence 10,가벼운,,I-휴대성 -Sentence 10,저용량,,I-휴대성 -Sentence 10,제품이맞고,,I-휴대성 -Sentence 10,이제품은,,I-휴대성 -Sentence 10,이동수단을,,I-휴대성 -Sentence 10,타고,,I-휴대성 -Sentence 10,이동할때,,I-휴대성 -Sentence 10,충전용으로,,I-휴대성 -Sentence 10,알맞은것같습니다,,I-휴대성 -Sentence 11,잠깐,,O -Sentence 11,사용해봤는데,,O -Sentence 11,충전,,O -Sentence 11,빠릅니다,,O -Sentence 12,별점,,B-배송 -Sentence 12,뺀,,I-배송 -Sentence 12,이유는,,I-배송 -Sentence 12,완충제,,I-배송 -Sentence 12,없이,,I-배송 -Sentence 12,보내주셔서,,I-배송 -Sentence 12,오면서,,I-배송 -Sentence 12,상품에,,I-배송 -Sentence 12,충격이,,I-배송 -Sentence 12,많았을것,,I-배송 -Sentence 12,같습니다,,I-배송 -Sentence 13,"또,",,B-기타 -Sentence 13,배터리,,I-기타 -Sentence 13,외관에도,,I-기타 -Sentence 13,완전히,,I-기타 -Sentence 13,새거같지않은,,I-기타 -Sentence 13,느낌이,,I-기타 -Sentence 13,있습니다(미세한,,I-기타 -Sentence 13,긁힘,,I-기타 -Sentence 13,등),,I-기타 -Sentence 14,급히,,O -Sentence 14,필요하기도,,O -Sentence 14,하고,,O -Sentence 14,본연의,,O -Sentence 14,기능에는,,O -Sentence 14,문제가,,O -Sentence 14,없어,,O -Sentence 14,그냥,,O -Sentence 14,쓰려고하지만,,O -Sentence 14,아쉽습니다,,O -Sentence 15,참고하세요~,,O -Sentence 16,가격,,O -Sentence 16,다른,,O -Sentence 16,곳에,,O -Sentence 16,비해,,O -Sentence 16,저렴한,,O -Sentence 16,편인것,,O -Sentence 16,같아요,,O -Sentence 17,배송도,,B-배송 -Sentence 17,빠르고,,I-배송 -Sentence 17,안전하게,,I-배송 -Sentence 17,왔어요,,I-배송 -Sentence 18,충전은,,B-충전 -Sentence 18,확실하게,,I-충전 -Sentence 18,안정적으로,,I-충전 -Sentence 18,잘,,I-충전 -Sentence 18,됩니다,,I-충전 -Sentence 19,무게도,,B-무게 -Sentence 19,가볍습니다,,I-무게 -Sentence 20,겉표면에,,B-기타 -Sentence 20,더럽고,,I-기타 -Sentence 20,끈적거리는,,I-기타 -Sentence 20,테이프스티커,,I-기타 -Sentence 20,같은게,,I-기타 -Sentence 20,왜,,I-기타 -Sentence 20,붙어있는지,,I-기타 -Sentence 20,새제품,,I-기타 -Sentence 20,맞는지,,I-기타 -Sentence 20,의심스럽네,,I-기타 -Sentence 21,으레,,O -Sentence 21,보배가,,O -Sentence 21,그렇듯이(),,O -Sentence 21,선이,,O -Sentence 21,자꾸,,O -Sentence 21,빠져여,,O -Sentence 21,그리고,,O -Sentence 21,보통,,O -Sentence 21,폰이랑,,O -Sentence 21,보배랑,,O -Sentence 21,가만히,,O -Sentence 21,안두고,,O -Sentence 21,손으로,,O -Sentence 21,들기만하눈,,O -Sentence 22,데,,O -Sentence 22,됏다가안됏다가,,O -Sentence 22,성능,,B-배터리용량 -Sentence 22,자체는,,I-배터리용량 -Sentence 22,조아욤,,I-배터리용량 -Sentence 22,풀충,,I-배터리용량 -Sentence 22,두번정도,,I-배터리용량 -Sentence 22,할수잇고,,I-배터리용량 -Sentence 22,고속충전이라,,B-고속충전 -Sentence 22,충전두빠르구여,,I-고속충전 -Sentence 22,다만,,B-케이블 -Sentence 22,케이블선이좀아쉽네여,,I-케이블 -Sentence 23,이거,,O -Sentence 23,좀,,O -Sentence 23,이상합니다,,O -Sentence 24,제가,,O -Sentence 24,불량이,,O -Sentence 24,걸린건지,,O -Sentence 24,모르겠는데,,O -Sentence 24,분명,,O -Sentence 24,어제까지만해도,,O -Sentence 24,불3개가,,O -Sentence 24,들어와있었는데,,O -Sentence 24,오늘,,O -Sentence 24,사용하려고,,O -Sentence 24,핸드폰을,,O -Sentence 24,꽂았는데,,O -Sentence 24,배터리가,,O -Sentence 24,없답니다,,O -Sentence 25,그리고,,O -Sentence 25,배터리,,O -Sentence 25,자체도,,O -Sentence 25,충전이,,O -Sentence 25,엄청,,O -Sentence 25,느려요,,O -Sentence 26,왜그러는지,,O -Sentence 26,모르겠어요,,O -Sentence 27,고객센터,,O -Sentence 27,가기도,,O -Sentence 27,귀찮은데,,O -Sentence 27,",,,,",,O -Sentence 27,아오,,O -Sentence 28,제품,,B-기타 -Sentence 28,자체는,,I-기타 -Sentence 28,마음에,,I-기타 -Sentence 28,"드는데,",,I-기타 -Sentence 28,제조일자가,,I-기타 -Sentence 28,2019년이라서,,I-기타 -Sentence 28,2점,,I-기타 -Sentence 28,뺐습니다,,I-기타 -Sentence 29,좋은데,,B-그립감 -Sentence 29,미끄러워서,,I-그립감 -Sentence 29,사용하기,,I-그립감 -Sentence 29,불편해요,,I-그립감 -Sentence 29,ㅠㅠ,,I-그립감 -Sentence 30,제조연월일이,,B-기타 -Sentence 30,좀,,I-기타 -Sentence 30,오래,,I-기타 -Sentence 30,되어서,,I-기타 -Sentence 30,아쉬워요,,I-기타 -Sentence 31,무게감은,,B-무게 -Sentence 31,딱좋고,,I-무게 -Sentence 31,제폰에,,B-충전 -Sentence 31,충전하면,,I-충전 -Sentence 31,어쩌다한번,,I-충전 -Sentence 31,충전안되서,,I-충전 -Sentence 31,한두번,,I-충전 -Sentence 31,다시,,I-충전 -Sentence 31,꽂아줘야돼요,,I-충전 -Sentence 32,상품은,,O -Sentence 32,그냥,,O -Sentence 32,그래요,,O -Sentence 33,배송도,,B-배송 -Sentence 33,빠르고,,I-배송 -Sentence 33,작동도,,I-배송 -Sentence 33,잘되서,,I-배송 -Sentence 33,좋은데,,I-배송 -Sentence 33,가격이,,B-기타 -Sentence 33,저렴할때,,I-기타 -Sentence 33,예상했지만,,I-기타 -Sentence 33,제조일이,,I-기타 -Sentence 33,좀,,I-기타 -Sentence 33,지나서,,I-기타 -Sentence 33,배터리가,,I-기타 -Sentence 33,완전,,I-기타 -Sentence 33,방전나있던게,,I-기타 -Sentence 33,아쉽네요,,I-기타 -Sentence 34,휴대성좋고,,B-휴대성 -Sentence 34,깔끔해요,,B-디자인 -Sentence 35,연결을,,O -Sentence 35,안해도,,O -Sentence 35,배터리가,,O -Sentence 35,원래,,O -Sentence 35,빨리,,O -Sentence 35,줄어드나요,,O -Sentence 36,생각보다,,O -Sentence 36,빨리,,O -Sentence 36,없어져서,,O -Sentence 36,당황,,O -Sentence 36,센터가봐야하나,,O -Sentence 36,고민중입니다,,O -Sentence 37,디자인은,,B-디자인 -Sentence 37,별로충전은,,I-디자인 -Sentence 37,빠릅니다,,I-충전 -Sentence 38,휴대성이조아요,,B-휴대성 -Sentence 39,굿굿,,O -Sentence 40,깔끔한,,O -Sentence 40,상품감사합니다,,O -Sentence 41,택배박스안에,,B-배송 -Sentence 41,배터리팩만,,I-배송 -Sentence 41,있어서,,I-배송 -Sentence 41,아쉬워요,,I-배송 -Sentence 42,제품은,,O -Sentence 42,맘에,,O -Sentence 42,드나,,O -Sentence 42,포장이,,O -Sentence 42,뾱뾱이같은거,,O -Sentence 42,한장도,,O -Sentence 42,없이,,O -Sentence 42,그냥,,O -Sentence 42,담겨옴,,O -Sentence 43,충격에,,O -Sentence 43,예민한,,O -Sentence 43,배터린데,,O -Sentence 43,너무하네요,,O -Sentence 44,충전줄도,,B-케이블 -Sentence 44,짧아서,,I-케이블 -Sentence 44,좀,,I-케이블 -Sentence 44,아쉬워요,,I-케이블 -Sentence 45,포장이,,O -Sentence 45,아쉬웠어요,,O -Sentence 46,완충제하나없이,,O -Sentence 46,달랑,,O -Sentence 46,배터리만,,O -Sentence 46,있어서,,O -Sentence 46,받을때,,O -Sentence 46,"별로였네요,",,O -Sentence 46,다행이,,O -Sentence 46,아직까진,,O -Sentence 46,기능에,,O -Sentence 46,문제는,,O -Sentence 46,없지만요,,O -Sentence 47,배낭,,B-무게 -Sentence 47,여행을,,I-무게 -Sentence 47,위해,,I-무게 -Sentence 47,새,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,선택하였는데,,I-무게 -Sentence 47,생각했던,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,보다,,I-무게 -Sentence 47,좀,,I-무게 -Sentence 47,무겁네요,,I-무게 -Sentence 48,잘,,O -Sentence 48,쓰고,,O -Sentence 48,있습니다,,O -Sentence 49,고속,,O -Sentence 49,충전보조배터리,,O -Sentence 49,좋은,,O -Sentence 49,가격에,,O -Sentence 49,잘,,O -Sentence 49,구매했어요,,O -Sentence 50,새기계는,,O -Sentence 50,충전이,,O -Sentence 50,원래,,O -Sentence 50,안,,O -Sentence 50,되있나요,,O -Sentence 51,고장난,,O -Sentence 51,건,,O -Sentence 51,줄ㅎ,,O -Sentence 51,괜찮아요,,O -Sentence 52,배송은,,B-배송 -Sentence 52,빠르나,,I-배송 -Sentence 52,제품,,B-AS -Sentence 52,충전이,,I-AS -Sentence 52,안되서,,I-AS -Sentence 52,받자마자,,I-AS -Sentence 52,As갔다왔네요,,I-AS -Sentence 53,배송,,B-배송 -Sentence 53,빠르고,,I-배송 -Sentence 53,디자인도,,B-디자인 -Sentence 53,무난한,,I-디자인 -Sentence 53,것,,I-디자인 -Sentence 53,같습니다,,I-디자인 -Sentence 54,그럭저럭,,B-기타 -Sentence 54,괜찮은,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 54,한데,,I-기타 -Sentence 54,가격이,,I-기타 -Sentence 54,비싼,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 55,충전,,O -Sentence 55,문제없이,,O -Sentence 55,잘되서,,O -Sentence 55,만족해요,,O -Sentence 56,생각보다,,O -Sentence 56,많이,,O -Sentence 56,괜찮아요,,O -Sentence 57,삼성,,O -Sentence 57,휴대폰에서,,O -Sentence 57,최적화된,,O -Sentence 57,거,,O -Sentence 57,같습니다,,O -Sentence 58,그런데,,O -Sentence 58,생각보다,,O -Sentence 58,충전하는데,,O -Sentence 58,시간이,,O -Sentence 58,좀,,O -Sentence 58,걸리는,,O -Sentence 58,거,,O -Sentence 58,같습니다;;;,,O -Sentence 59,무조건,,B-배송 -Sentence 59,5점,,I-배송 -Sentence 59,주는데,,I-배송 -Sentence 59,뾱뾱이도없고,,I-배송 -Sentence 59,에어쿠션도,,I-배송 -Sentence 59,없어서,,I-배송 -Sentence 59,3점드려요,,I-배송 -Sentence 60,이렇게,,O -Sentence 60,파시면,,O -Sentence 60,몇백원,,O -Sentence 60,아끼려다,,O -Sentence 60,몇십만원,,O -Sentence 60,손해봅니다,,O -Sentence 61,사장님,,O -Sentence 62,뽁뽁이,,O -Sentence 62,같은거,,O -Sentence 62,없이,,O -Sentence 62,박스에,,O -Sentence 62,상품만,,O -Sentence 62,왔어요,,O -Sentence 63,전자제품인데,,O -Sentence 63,택배기사님은,,O -Sentence 63,문앞에,,O -Sentence 63,물건,,O -Sentence 63,던져버리구요,,O -Sentence 64,상자에,,O -Sentence 64,배터리본품만,,O -Sentence 64,달랑,,O -Sentence 64,들어와있어요,,O -Sentence 65,충진재도없이,,O -Sentence 66,아이폰용으로,,B-호환성 -Sentence 66,구입했는데,,I-호환성 -Sentence 66,usb용이,,I-호환성 -Sentence 66,없네요ㅜㅜ,,I-호환성 -Sentence 67,제품은,,O -Sentence 67,브랜드라,,O -Sentence 67,좋으나,,O -Sentence 67,포장이,,O -Sentence 67,꽝입니다,,O -Sentence 68,제품포장도,,O -Sentence 68,정말,,O -Sentence 68,얇은데,,O -Sentence 68,뽁뽁이,,O -Sentence 68,한장,,O -Sentence 68,없이,,O -Sentence 68,덜렁,,O -Sentence 68,들어있더군요,,O -Sentence 69,걍,,O -Sentence 69,심플해요,,O -Sentence 69,케이블도주고,,O -Sentence 69,많이,,O -Sentence 69,파세요^^,,O -Sentence 70,배송,,O -Sentence 70,잘오고,,O -Sentence 70,제품,,O -Sentence 70,괜찮은거같아요,,O -Sentence 71,상품은,,O -Sentence 71,좋은데,,O -Sentence 71,배송이,,O -Sentence 71,너무느려요,,O -Sentence 72,좋은데,,O -Sentence 72,너무삐싼,,O -Sentence 72,삼성,,O -Sentence 72,보조배터리,,O -Sentence 73,제조일자,,O -Sentence 73,보고,,O -Sentence 73,깜놀했네요,,O -Sentence 74,나름,,O -Sentence 74,좋게,,O -Sentence 74,구입건승하시길,,O -Sentence 75,삼성정품,,O -Sentence 75,배터리,,O -Sentence 75,비싸요,,O -Sentence 76,2019년,,O -Sentence 76,3월,,O -Sentence 76,제조품입니다,,O -Sentence 77,생각보자는보통입니다,,O -Sentence 78,조아요,,O -Sentence 79,조아요조아요ㅋ,,O -Sentence 80,휴대용,,O -Sentence 80,보조바데리가,,O -Sentence 80,필요해서,,O -Sentence 80,인터넷,,O -Sentence 80,쇼핑하다가,,O -Sentence 80,디자인이,,O -Sentence 80,심플해서,,O -Sentence 80,구매했는데,,O -Sentence 80,받아보니,,O -Sentence 80,"맘에듭니다,",,O -Sentence 80,다만,,O -Sentence 80,충전연결,,O -Sentence 80,선을,,O -Sentence 80,함께,,O -Sentence 80,세트로,,O -Sentence 80,판매하면,,O -Sentence 80,"더좋겠네요,",,O -Sentence 80,별도구매라,,O -Sentence 80,아쉽네요,,O -Sentence 81,가볍고,,O -Sentence 81,디자인괜찮은데,,O -Sentence 81,생각보다,,O -Sentence 81,충전이,,O -Sentence 81,느려요,,O -Sentence 82,크고,,O -Sentence 82,무게감이,,O -Sentence 82,있네요,,O -Sentence 83,충전은,,O -Sentence 83,잘되네요,,O -Sentence 84,충전케이블에,,O -Sentence 84,따라,,O -Sentence 84,충전이,,O -Sentence 84,될때도,,O -Sentence 84,있고,,O -Sentence 84,안될때도,,O -Sentence 84,있고,,O -Sentence 84,귀찮아서,,O -Sentence 84,그냥,,O -Sentence 84,쓰고,,O -Sentence 84,있어요,,O -Sentence 85,충전시간도,,O -Sentence 85,어쩔때는,,O -Sentence 85,하루,,O -Sentence 85,종일,,O -Sentence 85,걸리고,,O -Sentence 85,두께도,,O -Sentence 85,얇고,,O -Sentence 85,모양도,,O -Sentence 85,좋은데,,O -Sentence 85,아무래도,,O -Sentence 85,뽑기를,,O -Sentence 85,잘못한듯ㅠㅠ,,O -Sentence 86,그립감이,,O -Sentence 86,좋고,,O -Sentence 86,충전기는,,O -Sentence 86,다른것과,,O -Sentence 86,차이점은,,O -Sentence 86,모르겠어요,,O -Sentence 87,잘때,,O -Sentence 87,충전안해두고자서,,O -Sentence 87,아침이면항상,,O -Sentence 87,앵꼬라,,O -Sentence 87,충전시키느라,,O -Sentence 87,폰볼일포는데,,O -Sentence 87,좋아요,,O -Sentence 88,배송,,O -Sentence 88,빠르고,,O -Sentence 88,좋아요,,O -Sentence 89,삼성이니,,O -Sentence 89,별세개,,O -Sentence 90,재질이,,O -Sentence 90,좋네요,,O -Sentence 91,더써봐야,,O -Sentence 91,알것같아요,,O -Sentence 92,다음에,,O -Sentence 92,또,,O -Sentence 92,주문하겠습니다,,O -Sentence 93,처음에는,,O -Sentence 93,속도잘나오다,,O -Sentence 93,생각보다,,O -Sentence 93,충전속도가,,O -Sentence 93,빠르진않네요,,O -Sentence 94,조금,,O -Sentence 94,무거운,,O -Sentence 94,느낌이,,O -Sentence 94,주머니가,,O -Sentence 94,부담스러워하네,,O -Sentence 95,디자인은,,O -Sentence 95,예뿐데,,O -Sentence 95,유에스비,,O -Sentence 95,없는것도,,O -Sentence 95,조금,,O -Sentence 95,부담스럽고,,O -Sentence 96,충전속도도,,O -Sentence 96,빠는데,,O -Sentence 96,핸드폰외의,,O -Sentence 96,다른기기에,,O -Sentence 96,충전이,,O -Sentence 96,안돼요,,O -Sentence 97,상품은,,O -Sentence 97,만족합니다만,,O -Sentence 97,배송지연이,,O -Sentence 97,있어서,,O -Sentence 97,좀,,O -Sentence 97,아쉽습니다^^,,O -Sentence 98,좀만,,O -Sentence 98,더,,O -Sentence 98,가벼우면,,O -Sentence 98,좋겠습니다,,O -Sentence 99,디자인이,,O -Sentence 99,예쁩니다,,O -Sentence 100,빠른배송,,O -Sentence 100,감사합니다,,O -Sentence 101,초고속이라더니,,O -Sentence 101,아닌것같네요,,O -Sentence 102,충전속도가,,O -Sentence 102,느린것같네요,,O -Sentence 103,너무,,O -Sentence 103,만족합니다,,O -Sentence 104,삼성정품만,,O -Sentence 104,저는,,O -Sentence 104,사용합니다,,O -Sentence 105,친구꺼빌려써봤다,,O -Sentence 106,좋아서,,O -Sentence 106,구매해봤는데국내판매제품이아닌가스티커가많이붙여져있네요,,O -Sentence 107,배송빨라서,,O -Sentence 107,좋지만,,O -Sentence 107,제품,,O -Sentence 107,표면에,,O -Sentence 107,찐득이,,O -Sentence 107,같은게,,O -Sentence 107,좀,,O -Sentence 107,묻어있어서,,O -Sentence 107,새제품이,,O -Sentence 107,아닌건지,,O -Sentence 107,좀,,O -Sentence 107,찜찜합니다,,O -Sentence 108,완충이,,O -Sentence 108,안돼요,,O -Sentence 109,하루종일,,O -Sentence 109,충전해도,,O -Sentence 109,세칸에서,,O -Sentence 109,넘어가질,,O -Sentence 109,않네요,,O -Sentence 110,2개를,,O -Sentence 110,"구매하였는데,",,O -Sentence 110,하나는,,O -Sentence 110,갑자기,,O -Sentence 110,방전이,,O -Sentence 110,되는,,O -Sentence 110,듯,,O -Sentence 110,해요,,O -Sentence 111,보조배터리는,,O -Sentence 111,정품으로,,O -Sentence 111,사용하시는걸,,O -Sentence 111,권해드려요~,,O -Sentence 112,무식하고,,O -Sentence 112,투박하게,,O -Sentence 112,큽니다,,O -Sentence 113,태블릿,,O -Sentence 113,충전용으로,,O -Sentence 113,삼,,O -Sentence 114,배송은,,O -Sentence 114,빠른데,,O -Sentence 114,초고속충전은,,O -Sentence 114,아니네요,,O -Sentence 115,삼성꺼라,,O -Sentence 115,믿고,,O -Sentence 115,샀어요,,O -Sentence 116,배송이,,O -Sentence 116,빨라요,,O -Sentence 117,충전,,O -Sentence 117,연결,,O -Sentence 117,케이블이,,O -Sentence 117,너무,,O -Sentence 117,짧으무,,O -Sentence 117,ㅜ,,O -Sentence 118,포장,,O -Sentence 118,잘되어,,O -Sentence 118,빠르게,,O -Sentence 118,왔어요,,O -Sentence 119,잘,,O -Sentence 119,맞아요,,O -Sentence 120,깔끔합니다,,O -Sentence 121,그럭저럭쓸만합니다,,O -Sentence 122,it',,O -Sentence 122,s,,O -Sentence 122,okay,,O -Sentence 123,1박2일용,,O -Sentence 123,정도,,O -Sentence 123,용량입니다,,O -Sentence 124,보조배터리에서,,O -Sentence 124,휴대폰,,O -Sentence 124,충전시,,O -Sentence 124,보조배터리의,,O -Sentence 124,잔량표시가,,O -Sentence 124,없어서,,O -Sentence 124,불편함,,O -Sentence 125,바깥에서,,O -Sentence 125,쿵하는,,O -Sentence 125,소리에,,O -Sentence 125,택배인가,,O -Sentence 125,싶어,,O -Sentence 125,나갔는데,,O -Sentence 125,완충재없이,,O -Sentence 125,포장이ㅠㅠ신경쓰시길~~~되는지는,,O -Sentence 125,확인해봐야겠네요ㅜㅜ,,O -Sentence 126,너무,,O -Sentence 126,무거워요,,O -Sentence 126,ㅠㅠ,,O -Sentence 126,쫌ㅜㅜ,,O -Sentence 127,충전시간이나,,O -Sentence 127,사용시간은,,O -Sentence 127,보통,,O -Sentence 127,만족이나,,O -Sentence 127,색상,,O -Sentence 127,선택,,O -Sentence 127,한정은,,O -Sentence 127,매우,,O -Sentence 127,불만족,,O -Sentence 128,좀,,O -Sentence 128,무거워요,,O -Sentence 128,ㅜㅜㅜ,,O diff --git a/ocr_tagging/resources_ocr/data/valid/output.csv b/ocr_tagging/resources_ocr/data/valid/output.csv deleted file mode 100644 index 0317d73..0000000 --- a/ocr_tagging/resources_ocr/data/valid/output.csv +++ /dev/null @@ -1,715 +0,0 @@ -Sentence #,Word,,Aspect -Sentence 1,일단,,O -Sentence 1,배터리는,,O -Sentence 1,잔량없이,,O -Sentence 1,와서,,O -Sentence 1,충전하고,,O -Sentence 1,있어서,,O -Sentence 1,뭐라,,O -Sentence 1,후기쓸게,,O -Sentence 1,없구요,,O -Sentence 2,배송해준,,B-배송 -Sentence 2,롯데택배는,,I-배송 -Sentence 2,정말,,I-배송 -Sentence 2,직업의식이,,I-배송 -Sentence 2,없네요,,I-배송 -Sentence 3,마구,,B-배송 -Sentence 3,찌그러진,,I-배송 -Sentence 3,박스를,,I-배송 -Sentence 3,보는순간,,I-배송 -Sentence 3,배터리에,,I-배송 -Sentence 3,충격이,,I-배송 -Sentence 3,많이,,I-배송 -Sentence 3,갔을것,,I-배송 -Sentence 3,같아,,I-배송 -Sentence 3,걱정이,,I-배송 -Sentence 3,되네요,,I-배송 -Sentence 4,내용물,,B-배송 -Sentence 4,포장도,,I-배송 -Sentence 4,뽁뽁이를,,I-배송 -Sentence 4,한,,I-배송 -Sentence 4,번,,I-배송 -Sentence 4,감아,,I-배송 -Sentence 4,줬으면,,I-배송 -Sentence 4,좋았을텐데,,I-배송 -Sentence 4,그냥,,I-배송 -Sentence 4,배터리포장박스,,I-배송 -Sentence 4,그대로,,I-배송 -Sentence 4,들어있어서,,I-배송 -Sentence 4,충격,,I-배송 -Sentence 4,흡수,,I-배송 -Sentence 4,제대로,,I-배송 -Sentence 4,됐겠습니다,,I-배송 -Sentence 5,판매자님도,,B-배송 -Sentence 5,포장,,I-배송 -Sentence 5,신경써주시고,,I-배송 -Sentence 5,배달하시는,,I-배송 -Sentence 5,분도,,I-배송 -Sentence 5,조심좀,,I-배송 -Sentence 5,해주시면,,I-배송 -Sentence 5,좋겠어요,,I-배송 -Sentence 6,상품,,B-문의 -Sentence 6,상세설명에는,,I-문의 -Sentence 6,10000mAh라고,,I-문의 -Sentence 6,적혀있는데,,I-문의 -Sentence 6,상품,,I-문의 -Sentence 6,박스에는,,I-문의 -Sentence 6,5000mAh라고,,I-문의 -Sentence 6,적혀있어서,,I-문의 -Sentence 6,판매처에,,I-문의 -Sentence 6,문의하니,,I-문의 -Sentence 6,일반충전시에는,,I-문의 -Sentence 6,10000이고,,I-문의 -Sentence 6,고속충전시에는,,I-문의 -Sentence 6,5000이라네요,,I-문의 -Sentence 7,전공하는,,B-충전 -Sentence 7,사람에게,,I-충전 -Sentence 7,물어보니,,I-충전 -Sentence 7,처음,,I-충전 -Sentence 7,듣는,,I-충전 -Sentence 7,이야기라고,,I-충전 -Sentence 7,하는데충전후,,I-충전 -Sentence 7,사용해보니,,I-충전 -Sentence 7,이상은,,I-충전 -Sentence 7,없는것,,I-충전 -Sentence 7,같아,,I-충전 -Sentence 7,그냥,,I-충전 -Sentence 7,사용하기는,,I-충전 -Sentence 7,하지만,,I-충전 -Sentence 7,찜찜한,,I-충전 -Sentence 7,마음은,,I-충전 -Sentence 7,사라지지,,I-충전 -Sentence 7,않아요,,I-충전 -Sentence 8,1,,B-무게 -Sentence 8,아주,,I-무게 -Sentence 8,무겁습니다,,I-무게 -Sentence 9,처음에는,,B-무게 -Sentence 9,괜찮았는데,,I-무게 -Sentence 9,2시간,,I-무게 -Sentence 9,넘어가니들고,,I-무게 -Sentence 9,다녔더니,,I-무게 -Sentence 9,허리가,,I-무게 -Sentence 9,부러질,,I-무게 -Sentence 9,것,,I-무게 -Sentence 9,같습니다,,I-무게 -Sentence 9,(주말에,,I-무게 -Sentence 9,친구들이랑,,I-무게 -Sentence 9,놀다가,,I-무게 -Sentence 9,결국,,I-무게 -Sentence 9,가벼운거,,I-무게 -Sentence 9,새거삼),,I-무게 -Sentence 10,2,,B-배터리용량 -Sentence 10,그,,I-배터리용량 -Sentence 10,외,,I-배터리용량 -Sentence 10,용량이,,I-배터리용량 -Sentence 10,커서,,I-배터리용량 -Sentence 10,여러번,,I-배터리용량 -Sentence 10,충전되는,,I-배터리용량 -Sentence 10,점과,,I-배터리용량 -Sentence 10,"디자인,",,B-디자인 -Sentence 10,색상,,I-디자인 -Sentence 10,등은,,I-디자인 -Sentence 10,마음에,,I-디자인 -Sentence 10,듭니다,,I-디자인 -Sentence 10,3,,B-휴대성 -Sentence 10,휴대용,,I-휴대성 -Sentence 10,여행용으로는,,I-휴대성 -Sentence 10,가벼운,,I-휴대성 -Sentence 10,저용량,,I-휴대성 -Sentence 10,제품이맞고,,I-휴대성 -Sentence 10,이제품은,,I-휴대성 -Sentence 10,이동수단을,,I-휴대성 -Sentence 10,타고,,I-휴대성 -Sentence 10,이동할때,,I-휴대성 -Sentence 10,충전용으로,,I-휴대성 -Sentence 10,알맞은것같습니다,,I-휴대성 -Sentence 11,잠깐,,O -Sentence 11,사용해봤는데,,O -Sentence 11,충전,,O -Sentence 11,빠릅니다,,O -Sentence 12,별점,,B-배송 -Sentence 12,뺀,,I-배송 -Sentence 12,이유는,,I-배송 -Sentence 12,완충제,,I-배송 -Sentence 12,없이,,I-배송 -Sentence 12,보내주셔서,,I-배송 -Sentence 12,오면서,,I-배송 -Sentence 12,상품에,,I-배송 -Sentence 12,충격이,,I-배송 -Sentence 12,많았을것,,I-배송 -Sentence 12,같습니다,,I-배송 -Sentence 13,"또,",,B-기타 -Sentence 13,배터리,,I-기타 -Sentence 13,외관에도,,I-기타 -Sentence 13,완전히,,I-기타 -Sentence 13,새거같지않은,,I-기타 -Sentence 13,느낌이,,I-기타 -Sentence 13,있습니다(미세한,,I-기타 -Sentence 13,긁힘,,I-기타 -Sentence 13,등),,I-기타 -Sentence 14,급히,,O -Sentence 14,필요하기도,,O -Sentence 14,하고,,O -Sentence 14,본연의,,O -Sentence 14,기능에는,,O -Sentence 14,문제가,,O -Sentence 14,없어,,O -Sentence 14,그냥,,O -Sentence 14,쓰려고하지만,,O -Sentence 14,아쉽습니다,,O -Sentence 15,참고하세요~,,O -Sentence 16,가격,,O -Sentence 16,다른,,O -Sentence 16,곳에,,O -Sentence 16,비해,,O -Sentence 16,저렴한,,O -Sentence 16,편인것,,O -Sentence 16,같아요,,O -Sentence 17,배송도,,B-배송 -Sentence 17,빠르고,,I-배송 -Sentence 17,안전하게,,I-배송 -Sentence 17,왔어요,,I-배송 -Sentence 18,충전은,,B-충전 -Sentence 18,확실하게,,I-충전 -Sentence 18,안정적으로,,I-충전 -Sentence 18,잘,,I-충전 -Sentence 18,됩니다,,I-충전 -Sentence 19,무게도,,B-무게 -Sentence 19,가볍습니다,,I-무게 -Sentence 20,겉표면에,,B-기타 -Sentence 20,더럽고,,I-기타 -Sentence 20,끈적거리는,,I-기타 -Sentence 20,테이프스티커,,I-기타 -Sentence 20,같은게,,I-기타 -Sentence 20,왜,,I-기타 -Sentence 20,붙어있는지,,I-기타 -Sentence 20,새제품,,I-기타 -Sentence 20,맞는지,,I-기타 -Sentence 20,의심스럽네,,I-기타 -Sentence 21,으레,,O -Sentence 21,보배가,,O -Sentence 21,그렇듯이(),,O -Sentence 21,선이,,O -Sentence 21,자꾸,,O -Sentence 21,빠져여,,O -Sentence 21,그리고,,O -Sentence 21,보통,,O -Sentence 21,폰이랑,,O -Sentence 21,보배랑,,O -Sentence 21,가만히,,O -Sentence 21,안두고,,O -Sentence 21,손으로,,O -Sentence 21,들기만하눈,,O -Sentence 22,데,,O -Sentence 22,됏다가안됏다가,,O -Sentence 22,성능,,B-배터리용량 -Sentence 22,자체는,,I-배터리용량 -Sentence 22,조아욤,,I-배터리용량 -Sentence 22,풀충,,I-배터리용량 -Sentence 22,두번정도,,I-배터리용량 -Sentence 22,할수잇고,,I-배터리용량 -Sentence 22,고속충전이라,,B-고속충전 -Sentence 22,충전두빠르구여,,I-고속충전 -Sentence 22,다만,,B-케이블 -Sentence 22,케이블선이좀아쉽네여,,I-케이블 -Sentence 23,이거,,O -Sentence 23,좀,,O -Sentence 23,이상합니다,,O -Sentence 24,제가,,O -Sentence 24,불량이,,O -Sentence 24,걸린건지,,O -Sentence 24,모르겠는데,,O -Sentence 24,분명,,O -Sentence 24,어제까지만해도,,O -Sentence 24,불3개가,,O -Sentence 24,들어와있었는데,,O -Sentence 24,오늘,,O -Sentence 24,사용하려고,,O -Sentence 24,핸드폰을,,O -Sentence 24,꽂았는데,,O -Sentence 24,배터리가,,O -Sentence 24,없답니다,,O -Sentence 25,그리고,,O -Sentence 25,배터리,,O -Sentence 25,자체도,,O -Sentence 25,충전이,,O -Sentence 25,엄청,,O -Sentence 25,느려요,,O -Sentence 26,왜그러는지,,O -Sentence 26,모르겠어요,,O -Sentence 27,고객센터,,O -Sentence 27,가기도,,O -Sentence 27,귀찮은데,,O -Sentence 27,",,,,",,O -Sentence 27,아오,,O -Sentence 28,제품,,B-기타 -Sentence 28,자체는,,I-기타 -Sentence 28,마음에,,I-기타 -Sentence 28,"드는데,",,I-기타 -Sentence 28,제조일자가,,I-기타 -Sentence 28,2019년이라서,,I-기타 -Sentence 28,2점,,I-기타 -Sentence 28,뺐습니다,,I-기타 -Sentence 29,좋은데,,B-그립감 -Sentence 29,미끄러워서,,I-그립감 -Sentence 29,사용하기,,I-그립감 -Sentence 29,불편해요,,I-그립감 -Sentence 29,ㅠㅠ,,I-그립감 -Sentence 30,제조연월일이,,B-기타 -Sentence 30,좀,,I-기타 -Sentence 30,오래,,I-기타 -Sentence 30,되어서,,I-기타 -Sentence 30,아쉬워요,,I-기타 -Sentence 31,무게감은,,B-무게 -Sentence 31,딱좋고,,I-무게 -Sentence 31,제폰에,,B-충전 -Sentence 31,충전하면,,I-충전 -Sentence 31,어쩌다한번,,I-충전 -Sentence 31,충전안되서,,I-충전 -Sentence 31,한두번,,I-충전 -Sentence 31,다시,,I-충전 -Sentence 31,꽂아줘야돼요,,I-충전 -Sentence 32,상품은,,O -Sentence 32,그냥,,O -Sentence 32,그래요,,O -Sentence 33,배송도,,B-배송 -Sentence 33,빠르고,,I-배송 -Sentence 33,작동도,,I-배송 -Sentence 33,잘되서,,I-배송 -Sentence 33,좋은데,,I-배송 -Sentence 33,가격이,,B-기타 -Sentence 33,저렴할때,,I-기타 -Sentence 33,예상했지만,,I-기타 -Sentence 33,제조일이,,I-기타 -Sentence 33,좀,,I-기타 -Sentence 33,지나서,,I-기타 -Sentence 33,배터리가,,I-기타 -Sentence 33,완전,,I-기타 -Sentence 33,방전나있던게,,I-기타 -Sentence 33,아쉽네요,,I-기타 -Sentence 34,휴대성좋고,,B-휴대성 -Sentence 34,깔끔해요,,B-디자인 -Sentence 35,연결을,,O -Sentence 35,안해도,,O -Sentence 35,배터리가,,O -Sentence 35,원래,,O -Sentence 35,빨리,,O -Sentence 35,줄어드나요,,O -Sentence 36,생각보다,,O -Sentence 36,빨리,,O -Sentence 36,없어져서,,O -Sentence 36,당황,,O -Sentence 36,센터가봐야하나,,O -Sentence 36,고민중입니다,,O -Sentence 37,디자인은,,B-디자인 -Sentence 37,별로충전은,,I-디자인 -Sentence 37,빠릅니다,,I-충전 -Sentence 38,휴대성이조아요,,B-휴대성 -Sentence 39,굿굿,,O -Sentence 40,깔끔한,,O -Sentence 40,상품감사합니다,,O -Sentence 41,택배박스안에,,B-배송 -Sentence 41,배터리팩만,,I-배송 -Sentence 41,있어서,,I-배송 -Sentence 41,아쉬워요,,I-배송 -Sentence 42,제품은,,O -Sentence 42,맘에,,O -Sentence 42,드나,,O -Sentence 42,포장이,,O -Sentence 42,뾱뾱이같은거,,O -Sentence 42,한장도,,O -Sentence 42,없이,,O -Sentence 42,그냥,,O -Sentence 42,담겨옴,,O -Sentence 43,충격에,,O -Sentence 43,예민한,,O -Sentence 43,배터린데,,O -Sentence 43,너무하네요,,O -Sentence 44,충전줄도,,B-케이블 -Sentence 44,짧아서,,I-케이블 -Sentence 44,좀,,I-케이블 -Sentence 44,아쉬워요,,I-케이블 -Sentence 45,포장이,,O -Sentence 45,아쉬웠어요,,O -Sentence 46,완충제하나없이,,O -Sentence 46,달랑,,O -Sentence 46,배터리만,,O -Sentence 46,있어서,,O -Sentence 46,받을때,,O -Sentence 46,"별로였네요,",,O -Sentence 46,다행이,,O -Sentence 46,아직까진,,O -Sentence 46,기능에,,O -Sentence 46,문제는,,O -Sentence 46,없지만요,,O -Sentence 47,배낭,,B-무게 -Sentence 47,여행을,,I-무게 -Sentence 47,위해,,I-무게 -Sentence 47,새,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,선택하였는데,,I-무게 -Sentence 47,생각했던,,I-무게 -Sentence 47,것,,I-무게 -Sentence 47,보다,,I-무게 -Sentence 47,좀,,I-무게 -Sentence 47,무겁네요,,I-무게 -Sentence 48,잘,,O -Sentence 48,쓰고,,O -Sentence 48,있습니다,,O -Sentence 49,고속,,O -Sentence 49,충전보조배터리,,O -Sentence 49,좋은,,O -Sentence 49,가격에,,O -Sentence 49,잘,,O -Sentence 49,구매했어요,,O -Sentence 50,새기계는,,O -Sentence 50,충전이,,O -Sentence 50,원래,,O -Sentence 50,안,,O -Sentence 50,되있나요,,O -Sentence 51,고장난,,O -Sentence 51,건,,O -Sentence 51,줄ㅎ,,O -Sentence 51,괜찮아요,,O -Sentence 52,배송은,,B-배송 -Sentence 52,빠르나,,I-배송 -Sentence 52,제품,,B-AS -Sentence 52,충전이,,I-AS -Sentence 52,안되서,,I-AS -Sentence 52,받자마자,,I-AS -Sentence 52,As갔다왔네요,,I-AS -Sentence 53,배송,,B-배송 -Sentence 53,빠르고,,I-배송 -Sentence 53,디자인도,,B-디자인 -Sentence 53,무난한,,I-디자인 -Sentence 53,것,,I-디자인 -Sentence 53,같습니다,,I-디자인 -Sentence 54,그럭저럭,,B-기타 -Sentence 54,괜찮은,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 54,한데,,I-기타 -Sentence 54,가격이,,I-기타 -Sentence 54,비싼,,I-기타 -Sentence 54,듯,,I-기타 -Sentence 55,충전,,O -Sentence 55,문제없이,,O -Sentence 55,잘되서,,O -Sentence 55,만족해요,,O -Sentence 56,생각보다,,O -Sentence 56,많이,,O -Sentence 56,괜찮아요,,O -Sentence 57,삼성,,O -Sentence 57,휴대폰에서,,O -Sentence 57,최적화된,,O -Sentence 57,거,,O -Sentence 57,같습니다,,O -Sentence 58,그런데,,O -Sentence 58,생각보다,,O -Sentence 58,충전하는데,,O -Sentence 58,시간이,,O -Sentence 58,좀,,O -Sentence 58,걸리는,,O -Sentence 58,거,,O -Sentence 58,같습니다;;;,,O -Sentence 59,무조건,,B-배송 -Sentence 59,5점,,I-배송 -Sentence 59,주는데,,I-배송 -Sentence 59,뾱뾱이도없고,,I-배송 -Sentence 59,에어쿠션도,,I-배송 -Sentence 59,없어서,,I-배송 -Sentence 59,3점드려요,,I-배송 -Sentence 60,이렇게,,O -Sentence 60,파시면,,O -Sentence 60,몇백원,,O -Sentence 60,아끼려다,,O -Sentence 60,몇십만원,,O -Sentence 60,손해봅니다,,O -Sentence 61,사장님,,O -Sentence 62,뽁뽁이,,O -Sentence 62,같은거,,O -Sentence 62,없이,,O -Sentence 62,박스에,,O -Sentence 62,상품만,,O -Sentence 62,왔어요,,O -Sentence 63,전자제품인데,,O -Sentence 63,택배기사님은,,O -Sentence 63,문앞에,,O -Sentence 63,물건,,O -Sentence 63,던져버리구요,,O -Sentence 64,상자에,,O -Sentence 64,배터리본품만,,O -Sentence 64,달랑,,O -Sentence 64,들어와있어요,,O -Sentence 65,충진재도없이,,O -Sentence 66,아이폰용으로,,B-호환성 -Sentence 66,구입했는데,,I-호환성 -Sentence 66,usb용이,,I-호환성 -Sentence 66,없네요ㅜㅜ,,I-호환성 -Sentence 67,제품은,,O -Sentence 67,브랜드라,,O -Sentence 67,좋으나,,O -Sentence 67,포장이,,O -Sentence 67,꽝입니다,,O -Sentence 68,제품포장도,,O -Sentence 68,정말,,O -Sentence 68,얇은데,,O -Sentence 68,뽁뽁이,,O -Sentence 68,한장,,O -Sentence 68,없이,,O -Sentence 68,덜렁,,O -Sentence 68,들어있더군요,,O -Sentence 69,걍,,O -Sentence 69,심플해요,,O -Sentence 69,케이블도주고,,O -Sentence 69,많이,,O -Sentence 69,파세요^^,,O -Sentence 70,배송,,O -Sentence 70,잘오고,,O -Sentence 70,제품,,O -Sentence 70,괜찮은거같아요,,O -Sentence 71,상품은,,O -Sentence 71,좋은데,,O -Sentence 71,배송이,,O -Sentence 71,너무느려요,,O -Sentence 72,좋은데,,O -Sentence 72,너무삐싼,,O -Sentence 72,삼성,,O -Sentence 72,보조배터리,,O -Sentence 73,제조일자,,O -Sentence 73,보고,,O -Sentence 73,깜놀했네요,,O -Sentence 74,나름,,O -Sentence 74,좋게,,O -Sentence 74,구입건승하시길,,O -Sentence 75,삼성정품,,O -Sentence 75,배터리,,O -Sentence 75,비싸요,,O -Sentence 76,2019년,,O -Sentence 76,3월,,O -Sentence 76,제조품입니다,,O -Sentence 77,생각보자는보통입니다,,O -Sentence 78,조아요,,O -Sentence 79,조아요조아요ㅋ,,O -Sentence 80,휴대용,,O -Sentence 80,보조바데리가,,O -Sentence 80,필요해서,,O -Sentence 80,인터넷,,O -Sentence 80,쇼핑하다가,,O -Sentence 80,디자인이,,O -Sentence 80,심플해서,,O -Sentence 80,구매했는데,,O -Sentence 80,받아보니,,O -Sentence 80,"맘에듭니다,",,O -Sentence 80,다만,,O -Sentence 80,충전연결,,O -Sentence 80,선을,,O -Sentence 80,함께,,O -Sentence 80,세트로,,O -Sentence 80,판매하면,,O -Sentence 80,"더좋겠네요,",,O -Sentence 80,별도구매라,,O -Sentence 80,아쉽네요,,O -Sentence 81,가볍고,,O -Sentence 81,디자인괜찮은데,,O -Sentence 81,생각보다,,O -Sentence 81,충전이,,O -Sentence 81,느려요,,O -Sentence 82,크고,,O -Sentence 82,무게감이,,O -Sentence 82,있네요,,O -Sentence 83,충전은,,O -Sentence 83,잘되네요,,O -Sentence 84,충전케이블에,,O -Sentence 84,따라,,O -Sentence 84,충전이,,O -Sentence 84,될때도,,O -Sentence 84,있고,,O -Sentence 84,안될때도,,O -Sentence 84,있고,,O -Sentence 84,귀찮아서,,O -Sentence 84,그냥,,O -Sentence 84,쓰고,,O -Sentence 84,있어요,,O -Sentence 85,충전시간도,,O -Sentence 85,어쩔때는,,O -Sentence 85,하루,,O -Sentence 85,종일,,O -Sentence 85,걸리고,,O -Sentence 85,두께도,,O -Sentence 85,얇고,,O -Sentence 85,모양도,,O -Sentence 85,좋은데,,O -Sentence 85,아무래도,,O -Sentence 85,뽑기를,,O -Sentence 85,잘못한듯ㅠㅠ,,O -Sentence 86,그립감이,,O -Sentence 86,좋고,,O -Sentence 86,충전기는,,O -Sentence 86,다른것과,,O -Sentence 86,차이점은,,O -Sentence 86,모르겠어요,,O -Sentence 87,잘때,,O -Sentence 87,충전안해두고자서,,O -Sentence 87,아침이면항상,,O -Sentence 87,앵꼬라,,O -Sentence 87,충전시키느라,,O -Sentence 87,폰볼일포는데,,O -Sentence 87,좋아요,,O -Sentence 88,배송,,O -Sentence 88,빠르고,,O -Sentence 88,좋아요,,O -Sentence 89,삼성이니,,O -Sentence 89,별세개,,O -Sentence 90,재질이,,O -Sentence 90,좋네요,,O -Sentence 91,더써봐야,,O -Sentence 91,알것같아요,,O -Sentence 92,다음에,,O -Sentence 92,또,,O -Sentence 92,주문하겠습니다,,O -Sentence 93,처음에는,,O -Sentence 93,속도잘나오다,,O -Sentence 93,생각보다,,O -Sentence 93,충전속도가,,O -Sentence 93,빠르진않네요,,O -Sentence 94,조금,,O -Sentence 94,무거운,,O -Sentence 94,느낌이,,O -Sentence 94,주머니가,,O -Sentence 94,부담스러워하네,,O -Sentence 95,디자인은,,O -Sentence 95,예뿐데,,O -Sentence 95,유에스비,,O -Sentence 95,없는것도,,O -Sentence 95,조금,,O -Sentence 95,부담스럽고,,O -Sentence 96,충전속도도,,O -Sentence 96,빠는데,,O -Sentence 96,핸드폰외의,,O -Sentence 96,다른기기에,,O -Sentence 96,충전이,,O -Sentence 96,안돼요,,O -Sentence 97,상품은,,O -Sentence 97,만족합니다만,,O -Sentence 97,배송지연이,,O -Sentence 97,있어서,,O -Sentence 97,좀,,O -Sentence 97,아쉽습니다^^,,O -Sentence 98,좀만,,O -Sentence 98,더,,O -Sentence 98,가벼우면,,O -Sentence 98,좋겠습니다,,O -Sentence 99,디자인이,,O -Sentence 99,예쁩니다,,O -Sentence 100,빠른배송,,O -Sentence 100,감사합니다,,O -Sentence 101,초고속이라더니,,O -Sentence 101,아닌것같네요,,O -Sentence 102,충전속도가,,O -Sentence 102,느린것같네요,,O -Sentence 103,너무,,O -Sentence 103,만족합니다,,O -Sentence 104,삼성정품만,,O -Sentence 104,저는,,O -Sentence 104,사용합니다,,O -Sentence 105,친구꺼빌려써봤다,,O -Sentence 106,좋아서,,O -Sentence 106,구매해봤는데국내판매제품이아닌가스티커가많이붙여져있네요,,O -Sentence 107,배송빨라서,,O -Sentence 107,좋지만,,O -Sentence 107,제품,,O -Sentence 107,표면에,,O -Sentence 107,찐득이,,O -Sentence 107,같은게,,O -Sentence 107,좀,,O -Sentence 107,묻어있어서,,O -Sentence 107,새제품이,,O -Sentence 107,아닌건지,,O -Sentence 107,좀,,O -Sentence 107,찜찜합니다,,O -Sentence 108,완충이,,O -Sentence 108,안돼요,,O -Sentence 109,하루종일,,O -Sentence 109,충전해도,,O -Sentence 109,세칸에서,,O -Sentence 109,넘어가질,,O -Sentence 109,않네요,,O -Sentence 110,2개를,,O -Sentence 110,"구매하였는데,",,O -Sentence 110,하나는,,O -Sentence 110,갑자기,,O -Sentence 110,방전이,,O -Sentence 110,되는,,O -Sentence 110,듯,,O -Sentence 110,해요,,O -Sentence 111,보조배터리는,,O -Sentence 111,정품으로,,O -Sentence 111,사용하시는걸,,O -Sentence 111,권해드려요~,,O -Sentence 112,무식하고,,O -Sentence 112,투박하게,,O -Sentence 112,큽니다,,O -Sentence 113,태블릿,,O -Sentence 113,충전용으로,,O -Sentence 113,삼,,O -Sentence 114,배송은,,O -Sentence 114,빠른데,,O -Sentence 114,초고속충전은,,O -Sentence 114,아니네요,,O -Sentence 115,삼성꺼라,,O -Sentence 115,믿고,,O -Sentence 115,샀어요,,O -Sentence 116,배송이,,O -Sentence 116,빨라요,,O -Sentence 117,충전,,O -Sentence 117,연결,,O -Sentence 117,케이블이,,O -Sentence 117,너무,,O -Sentence 117,짧으무,,O -Sentence 117,ㅜ,,O -Sentence 118,포장,,O -Sentence 118,잘되어,,O -Sentence 118,빠르게,,O -Sentence 118,왔어요,,O -Sentence 119,잘,,O -Sentence 119,맞아요,,O -Sentence 120,깔끔합니다,,O -Sentence 121,그럭저럭쓸만합니다,,O -Sentence 122,it',,O -Sentence 122,s,,O -Sentence 122,okay,,O -Sentence 123,1박2일용,,O -Sentence 123,정도,,O -Sentence 123,용량입니다,,O -Sentence 124,보조배터리에서,,O -Sentence 124,휴대폰,,O -Sentence 124,충전시,,O -Sentence 124,보조배터리의,,O -Sentence 124,잔량표시가,,O -Sentence 124,없어서,,O -Sentence 124,불편함,,O -Sentence 125,바깥에서,,O -Sentence 125,쿵하는,,O -Sentence 125,소리에,,O -Sentence 125,택배인가,,O -Sentence 125,싶어,,O -Sentence 125,나갔는데,,O -Sentence 125,완충재없이,,O -Sentence 125,포장이ㅠㅠ신경쓰시길~~~되는지는,,O -Sentence 125,확인해봐야겠네요ㅜㅜ,,O -Sentence 126,너무,,O -Sentence 126,무거워요,,O -Sentence 126,ㅠㅠ,,O -Sentence 126,쫌ㅜㅜ,,O -Sentence 127,충전시간이나,,O -Sentence 127,사용시간은,,O -Sentence 127,보통,,O -Sentence 127,만족이나,,O -Sentence 127,색상,,O -Sentence 127,선택,,O -Sentence 127,한정은,,O -Sentence 127,매우,,O -Sentence 127,불만족,,O -Sentence 128,좀,,O -Sentence 128,무거워요,,O -Sentence 128,ㅜㅜㅜ,,O diff --git a/ocr_tagging/scripts_ocr/data/data_parsing.sh b/ocr_tagging/scripts_ocr/data/data_parsing.sh index 229901d..a0f48b2 100644 --- a/ocr_tagging/scripts_ocr/data/data_parsing.sh +++ b/ocr_tagging/scripts_ocr/data/data_parsing.sh @@ -1,10 +1,10 @@ #!/bin/bash # 경로에 띄어쓰기가 있다면 공백을 제거할 것! -fp="./resources_review/data/" -save_p="./resources_review/parsing_data/" +fp="./resources_ocr/data/" +save_p="./resources_ocr/parsing_data/" val_ratio=0.1 test_ratio=0.1 encoding=utf-8-sig -python ./src_review/do_parsingData.py --fp=$fp --save_p=$save_p --val_ratio=$val_ratio --test_ratio=$test_ratio --encoding=$encoding \ No newline at end of file +python ./src_ocr/do_parsingData.py --fp=$fp --save_p=$save_p --val_ratio=$val_ratio --test_ratio=$test_ratio --encoding=$encoding \ No newline at end of file diff --git a/ocr_tagging/scripts_ocr/model/do_eval.sh b/ocr_tagging/scripts_ocr/model/do_eval.sh index 08c9aa9..e66fd26 100644 --- a/ocr_tagging/scripts_ocr/model/do_eval.sh +++ b/ocr_tagging/scripts_ocr/model/do_eval.sh @@ -1,6 +1,6 @@ #!/bin/bash -eval_fp="./resources_ocr/data/test/" +eval_fp="./resources_ocr/parsing_data/test/" eval_batch_size=4 init_model_path=klue/bert-base max_length=512 diff --git a/ocr_tagging/scripts_ocr/model/do_train.sh b/ocr_tagging/scripts_ocr/model/do_train.sh index f873469..820fd49 100644 --- a/ocr_tagging/scripts_ocr/model/do_train.sh +++ b/ocr_tagging/scripts_ocr/model/do_train.sh @@ -5,12 +5,12 @@ train_batch_size=4 valid_batch_size=4 init_model_path=klue/bert-base max_length=512 -need_birnn=false +need_birnn=0 aspect_drop_ratio=0.3 aspect_in_feature=768 stop_patience=3 -train_fp="./resources_ocr/data/train/" -valid_fp="./resources_ocr/data/valid/" +train_fp="./resources_ocr/parsing_data/train/" +valid_fp="./resources_ocr/parsing_data/valid/" base_path="./ckpt_ocr/model/" label_info_file="meta.bin" out_model_path="pytorch_model.bin" diff --git a/ocr_tagging/src_ocr/data_manager/dataset/absa.py b/ocr_tagging/src_ocr/data_manager/dataset/absa.py index 975b474..8ea5dd0 100644 --- a/ocr_tagging/src_ocr/data_manager/dataset/absa.py +++ b/ocr_tagging/src_ocr/data_manager/dataset/absa.py @@ -101,9 +101,21 @@ def __iter__(self): aspects = df.groupby("Sentence #")["Aspect"].apply(list).values aspects2 = df.groupby("Sentence #")["Aspect2"].apply(list).values + # for i in range(len(sentences)): + # self.s_len += 1 + # yield self.parsing_data(sentences[i], aspects[i], aspects2[i]) for i in range(len(sentences)): self.s_len += 1 - yield self.parsing_data(sentences[i], aspects[i], aspects2[i]) + if(i % 2 == 0): + yield self.parsing_data(sentences[i], sentences[i+1], aspects[i], aspects[i+1], aspects2[i], aspects2[i+1]) + elif(i & 2 == 0 and i == len(sentences)-1): + yield self.parsing_data(sentences[i], sentences[i], aspects[i], aspects[i], aspects2[i], aspects2[i]) + # if(i == len(sentences)-1): + # self.s_len += 2 + # yield self.parsing_data(sentences[i], sentences[i], aspects[i], aspects[i], aspects2[i], aspects2[i]) + # else: + # self.s_len += 2 + # yield self.parsing_data(sentences[i], sentences[i+1], aspects[i], aspects[i+1], aspects2[i], aspects2[i+1]) def __len__(self): if self.data_len == 0: @@ -122,17 +134,25 @@ def get_length(self): self.data_len = math.ceil(self.data_len / self.batch_size) return self.data_len - def parsing_data(self, text, aspect, aspect2): + def parsing_data(self, text_1, text_2, aspect_1, aspect_2, aspect2_1, aspect2_2): ids = [] target_aspect = [] # target Aspect Category tensor ids 저장 리스트 target_aspect2 = [] # target 대분류 Aspect Category tensor ids 저장 리스트 (대분류 기준 성능 측정을 위함) - for i, s in enumerate(text): - inputs = self.tokenizer.encode(s, add_special_tokens=False) - input_len = len(inputs) - ids.extend(inputs) - target_aspect.extend([aspect[i]] * input_len) - target_aspect2.extend([aspect2[i]] * input_len) + # for i, s in enumerate(text): + # inputs = self.tokenizer.encode(s, add_special_tokens=False) + # input_len = len(inputs) + # ids.extend(inputs) + # target_aspect.extend([aspect[i]] * input_len) + # target_aspect2.extend([aspect2[i]] * input_len) + for i, (s1, s2) in enumerate(zip(text_1, text_2)): + inputs_1 = self.tokenizer.encode(s1, add_special_tokens=False) + inputs_2 = self.tokenizer.encode(s2, add_special_tokens=False) + inputs_1_len = len(inputs_1) + inputs_2_len = len(inputs_2) + ids.extend(inputs_1 + self.SEP_IDS + inputs_2) + target_aspect.extend([aspect_1[i]] * inputs_1_len + self.PADDING_TAG_IDS + [aspect_2[i]] * inputs_2_len) + target_aspect2.extend([aspect2_1[i]] * inputs_1_len + self.PADDING_TAG_IDS + [aspect2_2[i]] * inputs_2_len) # BERT가 처리할 수 있는 길이 (max_length)에 맞추어 slicing ids = ids[:self.max_len - 2] diff --git "a/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\210\230\354\240\225.py" "b/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\210\230\354\240\225.py" new file mode 100644 index 0000000..8ea5dd0 --- /dev/null +++ "b/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\210\230\354\240\225.py" @@ -0,0 +1,184 @@ +from pathlib import Path +import os +import joblib +from sklearn import preprocessing +from sklearn.utils import column_or_1d +from collections import OrderedDict +import pandas as pd +import transformers +import torch +from torch.utils.data import IterableDataset +from utils.file_io import get_file_list, read_csv +from data_manager.parsers.label_unification.label_map import label_list, label_changing_rule +import math + + +# Encoder - Sentiment 및 Aspect 속성을 Encoding할 Label Encoder class +class MyLabelEncoder(preprocessing.LabelEncoder): + def fit(self, y): + y = column_or_1d(y, warn=True) + self.classes_ = pd.Series(y).unique() + return self + + +class Encoder: + def __init__(self, config, fp=None, extension="csv"): + self.fp = fp + self.file_list = get_file_list(fp, extension) + self.meta_data_fp = os.path.join(config.base_path + config.label_info_file) + + # Sentiment 속성을 위한 Encoder 및 Aspect Category를 위한 Encoder + self.enc_aspect, self.enc_aspect2 = None, None + self.aspect_labels, self.aspect2_labels = ['PAD', 'O'], ['PAD', 'O'] + + # 저장된 Encoder 유무를 확인 + # 있으면 load, 없으면 생성 + def check_encoder_fp(self): + meta_data_f = Path(self.meta_data_fp) + if meta_data_f.exists(): + meta_data = joblib.load(self.meta_data_fp) + self.enc_aspect, self.enc_aspect2 = meta_data["enc_aspect"], meta_data["enc_aspect2"] + else: + meta_data = self.set_encoder() + joblib.dump(meta_data, self.meta_data_fp) + + def get_encoder(self): + if self.enc_aspect is None or self.enc_aspect2 is None: + self.check_encoder_fp() + return self.enc_aspect, self.enc_aspect2 + + def set_encoder(self): + if len(self.file_list) == 0: + print(f"파일 경로 {self.fp}에 Encoding할 데이터가 존재하지 않습니다.") + raise FileExistsError() + + for now_fp in self.file_list: + df = read_csv(now_fp) + self.aspect_labels.extend(list(df["Aspect"].unique())) + + # Encoder 선언 및 fitting + self.enc_aspect, self.enc_aspect2 = MyLabelEncoder(), MyLabelEncoder() + self.aspect_labels = list(OrderedDict.fromkeys(self.aspect_labels)) + self.aspect2_labels.extend([label for label in label_list]) + + self.enc_aspect = self.enc_aspect.fit(self.aspect_labels) + self.enc_aspect2 = self.enc_aspect2.fit(self.aspect2_labels) + return {"enc_aspect": self.enc_aspect, "enc_aspect2": self.enc_aspect2} + + +class ABSADataset(IterableDataset): + def __init__(self, config, fp, enc_aspect, enc_aspect2, batch_size, data_len=0, extension="csv"): + self.data_len = data_len + self.file_list = get_file_list(fp, extension) + self.batch_size = batch_size + self.max_len = config.max_length + self.config = config + + # Encoder Setting + self.enc_aspect = enc_aspect + self.enc_aspect2 = enc_aspect2 + + # for embedding + self.tokenizer = transformers.BertTokenizer.from_pretrained(config.init_model_path, do_lower_case=False) + self.CLS_IDS = self.tokenizer.encode('[CLS]', add_special_tokens=False) # [2] + self.PAD_IDS = self.tokenizer.encode('[PAD]', add_special_tokens=False) # [0] + self.SEP_IDS = self.tokenizer.encode('[SEP]', add_special_tokens=False) # [3] + self.PADDING_TAG_IDS = [0] + self.s_len = 0 + + def __iter__(self): + # read data + for now_fp in self.file_list: + df = read_csv(now_fp) + df.loc[:, "Sentence #"] = df["Sentence #"].fillna(method="ffill") + df["Aspect2"] = df["Aspect"] + df = df.replace({"Aspect2": label_changing_rule}) + + df.loc[:, "Aspect"] = self.enc_aspect.transform(df[["Aspect"]]) + df.loc[:, "Aspect2"] = self.enc_aspect2.transform(df[["Aspect2"]]) + + sentences = df.groupby("Sentence #")["Word"].apply(list).values + aspects = df.groupby("Sentence #")["Aspect"].apply(list).values + aspects2 = df.groupby("Sentence #")["Aspect2"].apply(list).values + + # for i in range(len(sentences)): + # self.s_len += 1 + # yield self.parsing_data(sentences[i], aspects[i], aspects2[i]) + for i in range(len(sentences)): + self.s_len += 1 + if(i % 2 == 0): + yield self.parsing_data(sentences[i], sentences[i+1], aspects[i], aspects[i+1], aspects2[i], aspects2[i+1]) + elif(i & 2 == 0 and i == len(sentences)-1): + yield self.parsing_data(sentences[i], sentences[i], aspects[i], aspects[i], aspects2[i], aspects2[i]) + # if(i == len(sentences)-1): + # self.s_len += 2 + # yield self.parsing_data(sentences[i], sentences[i], aspects[i], aspects[i], aspects2[i], aspects2[i]) + # else: + # self.s_len += 2 + # yield self.parsing_data(sentences[i], sentences[i+1], aspects[i], aspects[i+1], aspects2[i], aspects2[i+1]) + + def __len__(self): + if self.data_len == 0: + self.data_len = self.get_length() + return self.data_len + + # data length를 계산 + def get_length(self): + if self.data_len > 0: + return self.data_len + else: + for now_fp in self.file_list: + df = read_csv(now_fp) + sentences = df.groupby("Sentence #")["Word"].apply(list).values + self.data_len += len(sentences) + self.data_len = math.ceil(self.data_len / self.batch_size) + return self.data_len + + def parsing_data(self, text_1, text_2, aspect_1, aspect_2, aspect2_1, aspect2_2): + ids = [] + target_aspect = [] # target Aspect Category tensor ids 저장 리스트 + target_aspect2 = [] # target 대분류 Aspect Category tensor ids 저장 리스트 (대분류 기준 성능 측정을 위함) + + # for i, s in enumerate(text): + # inputs = self.tokenizer.encode(s, add_special_tokens=False) + # input_len = len(inputs) + # ids.extend(inputs) + # target_aspect.extend([aspect[i]] * input_len) + # target_aspect2.extend([aspect2[i]] * input_len) + for i, (s1, s2) in enumerate(zip(text_1, text_2)): + inputs_1 = self.tokenizer.encode(s1, add_special_tokens=False) + inputs_2 = self.tokenizer.encode(s2, add_special_tokens=False) + inputs_1_len = len(inputs_1) + inputs_2_len = len(inputs_2) + ids.extend(inputs_1 + self.SEP_IDS + inputs_2) + target_aspect.extend([aspect_1[i]] * inputs_1_len + self.PADDING_TAG_IDS + [aspect_2[i]] * inputs_2_len) + target_aspect2.extend([aspect2_1[i]] * inputs_1_len + self.PADDING_TAG_IDS + [aspect2_2[i]] * inputs_2_len) + + # BERT가 처리할 수 있는 길이 (max_length)에 맞추어 slicing + ids = ids[:self.max_len - 2] + target_aspect = target_aspect[:self.max_len - 2] + target_aspect2 = target_aspect2[:self.max_len - 2] + + # SPECIAL TOKEN 추가 및 PADDING 수행 + ids = self.CLS_IDS + ids + self.SEP_IDS + target_aspect = self.PADDING_TAG_IDS + target_aspect + self.PADDING_TAG_IDS # CLS, SEP 태그 0 + target_aspect2 = self.PADDING_TAG_IDS + target_aspect2 + self.PADDING_TAG_IDS + + mask = [1] * len(ids) + token_type_ids = self.PAD_IDS * len(ids) + padding_len = self.max_len - len(ids) + ids = ids + (self.PAD_IDS * padding_len) + mask = mask + ([0] * padding_len) + + token_type_ids = token_type_ids + (self.PAD_IDS * padding_len) + target_aspect = target_aspect + (self.PADDING_TAG_IDS * padding_len) + target_aspect2 = target_aspect2 + (self.PADDING_TAG_IDS * padding_len) + + return { + "ids": torch.tensor(ids, dtype=torch.long), + "mask": torch.tensor(mask, dtype=torch.long), + "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long), + "target_aspect": torch.tensor(target_aspect, dtype=torch.long), + "target_aspect2": torch.tensor(target_aspect2, dtype=torch.long), + } + diff --git "a/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\233\220\353\263\270.py" "b/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\233\220\353\263\270.py" new file mode 100644 index 0000000..975b474 --- /dev/null +++ "b/ocr_tagging/src_ocr/data_manager/dataset/absa_\354\233\220\353\263\270.py" @@ -0,0 +1,164 @@ +from pathlib import Path +import os +import joblib +from sklearn import preprocessing +from sklearn.utils import column_or_1d +from collections import OrderedDict +import pandas as pd +import transformers +import torch +from torch.utils.data import IterableDataset +from utils.file_io import get_file_list, read_csv +from data_manager.parsers.label_unification.label_map import label_list, label_changing_rule +import math + + +# Encoder - Sentiment 및 Aspect 속성을 Encoding할 Label Encoder class +class MyLabelEncoder(preprocessing.LabelEncoder): + def fit(self, y): + y = column_or_1d(y, warn=True) + self.classes_ = pd.Series(y).unique() + return self + + +class Encoder: + def __init__(self, config, fp=None, extension="csv"): + self.fp = fp + self.file_list = get_file_list(fp, extension) + self.meta_data_fp = os.path.join(config.base_path + config.label_info_file) + + # Sentiment 속성을 위한 Encoder 및 Aspect Category를 위한 Encoder + self.enc_aspect, self.enc_aspect2 = None, None + self.aspect_labels, self.aspect2_labels = ['PAD', 'O'], ['PAD', 'O'] + + # 저장된 Encoder 유무를 확인 + # 있으면 load, 없으면 생성 + def check_encoder_fp(self): + meta_data_f = Path(self.meta_data_fp) + if meta_data_f.exists(): + meta_data = joblib.load(self.meta_data_fp) + self.enc_aspect, self.enc_aspect2 = meta_data["enc_aspect"], meta_data["enc_aspect2"] + else: + meta_data = self.set_encoder() + joblib.dump(meta_data, self.meta_data_fp) + + def get_encoder(self): + if self.enc_aspect is None or self.enc_aspect2 is None: + self.check_encoder_fp() + return self.enc_aspect, self.enc_aspect2 + + def set_encoder(self): + if len(self.file_list) == 0: + print(f"파일 경로 {self.fp}에 Encoding할 데이터가 존재하지 않습니다.") + raise FileExistsError() + + for now_fp in self.file_list: + df = read_csv(now_fp) + self.aspect_labels.extend(list(df["Aspect"].unique())) + + # Encoder 선언 및 fitting + self.enc_aspect, self.enc_aspect2 = MyLabelEncoder(), MyLabelEncoder() + self.aspect_labels = list(OrderedDict.fromkeys(self.aspect_labels)) + self.aspect2_labels.extend([label for label in label_list]) + + self.enc_aspect = self.enc_aspect.fit(self.aspect_labels) + self.enc_aspect2 = self.enc_aspect2.fit(self.aspect2_labels) + return {"enc_aspect": self.enc_aspect, "enc_aspect2": self.enc_aspect2} + + +class ABSADataset(IterableDataset): + def __init__(self, config, fp, enc_aspect, enc_aspect2, batch_size, data_len=0, extension="csv"): + self.data_len = data_len + self.file_list = get_file_list(fp, extension) + self.batch_size = batch_size + self.max_len = config.max_length + self.config = config + + # Encoder Setting + self.enc_aspect = enc_aspect + self.enc_aspect2 = enc_aspect2 + + # for embedding + self.tokenizer = transformers.BertTokenizer.from_pretrained(config.init_model_path, do_lower_case=False) + self.CLS_IDS = self.tokenizer.encode('[CLS]', add_special_tokens=False) # [2] + self.PAD_IDS = self.tokenizer.encode('[PAD]', add_special_tokens=False) # [0] + self.SEP_IDS = self.tokenizer.encode('[SEP]', add_special_tokens=False) # [3] + self.PADDING_TAG_IDS = [0] + self.s_len = 0 + + def __iter__(self): + # read data + for now_fp in self.file_list: + df = read_csv(now_fp) + df.loc[:, "Sentence #"] = df["Sentence #"].fillna(method="ffill") + df["Aspect2"] = df["Aspect"] + df = df.replace({"Aspect2": label_changing_rule}) + + df.loc[:, "Aspect"] = self.enc_aspect.transform(df[["Aspect"]]) + df.loc[:, "Aspect2"] = self.enc_aspect2.transform(df[["Aspect2"]]) + + sentences = df.groupby("Sentence #")["Word"].apply(list).values + aspects = df.groupby("Sentence #")["Aspect"].apply(list).values + aspects2 = df.groupby("Sentence #")["Aspect2"].apply(list).values + + for i in range(len(sentences)): + self.s_len += 1 + yield self.parsing_data(sentences[i], aspects[i], aspects2[i]) + + def __len__(self): + if self.data_len == 0: + self.data_len = self.get_length() + return self.data_len + + # data length를 계산 + def get_length(self): + if self.data_len > 0: + return self.data_len + else: + for now_fp in self.file_list: + df = read_csv(now_fp) + sentences = df.groupby("Sentence #")["Word"].apply(list).values + self.data_len += len(sentences) + self.data_len = math.ceil(self.data_len / self.batch_size) + return self.data_len + + def parsing_data(self, text, aspect, aspect2): + ids = [] + target_aspect = [] # target Aspect Category tensor ids 저장 리스트 + target_aspect2 = [] # target 대분류 Aspect Category tensor ids 저장 리스트 (대분류 기준 성능 측정을 위함) + + for i, s in enumerate(text): + inputs = self.tokenizer.encode(s, add_special_tokens=False) + input_len = len(inputs) + ids.extend(inputs) + target_aspect.extend([aspect[i]] * input_len) + target_aspect2.extend([aspect2[i]] * input_len) + + # BERT가 처리할 수 있는 길이 (max_length)에 맞추어 slicing + ids = ids[:self.max_len - 2] + target_aspect = target_aspect[:self.max_len - 2] + target_aspect2 = target_aspect2[:self.max_len - 2] + + # SPECIAL TOKEN 추가 및 PADDING 수행 + ids = self.CLS_IDS + ids + self.SEP_IDS + target_aspect = self.PADDING_TAG_IDS + target_aspect + self.PADDING_TAG_IDS # CLS, SEP 태그 0 + target_aspect2 = self.PADDING_TAG_IDS + target_aspect2 + self.PADDING_TAG_IDS + + mask = [1] * len(ids) + token_type_ids = self.PAD_IDS * len(ids) + padding_len = self.max_len - len(ids) + ids = ids + (self.PAD_IDS * padding_len) + mask = mask + ([0] * padding_len) + + token_type_ids = token_type_ids + (self.PAD_IDS * padding_len) + target_aspect = target_aspect + (self.PADDING_TAG_IDS * padding_len) + target_aspect2 = target_aspect2 + (self.PADDING_TAG_IDS * padding_len) + + return { + "ids": torch.tensor(ids, dtype=torch.long), + "mask": torch.tensor(mask, dtype=torch.long), + "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long), + "target_aspect": torch.tensor(target_aspect, dtype=torch.long), + "target_aspect2": torch.tensor(target_aspect2, dtype=torch.long), + } + diff --git a/ocr_tagging/src_ocr/data_manager/parsers/json2csv.py b/ocr_tagging/src_ocr/data_manager/parsers/json2csv.py index 6bac46a..85edd7c 100644 --- a/ocr_tagging/src_ocr/data_manager/parsers/json2csv.py +++ b/ocr_tagging/src_ocr/data_manager/parsers/json2csv.py @@ -1,148 +1,188 @@ -import pandas as pd import os +import csv import json -import re -from collections import Counter import kss -def making_result_fp(args, filename): - result_dir = args.save_p - os.makedirs(result_dir, exist_ok=True) - - filename, ext = os.path.splitext(filename) - result_fp = os.path.join(result_dir, f"{filename}.csv") - - return result_fp - -def preprocess_text(text): - return text.replace('\n', ' ') - -def split_content_into_sentences(content): - sentences = kss.split_sentences(content) - return [preprocess_text(sent.strip()) for sent in sentences if sent.strip()] - -def tag_sentence(sentence, topics): - words = sentence.split() - tags = ['O'] * len(words) - for topic in topics: - topic_text = preprocess_text(topic['text']) - topic_words = topic_text.split() - start_idx = 0 - while True: - idx = sentence.find(topic_text, start_idx) - if idx == -1: - break - end_idx = idx + len(topic_text) - word_idx = len(sentence[:idx].split()) - for j in range(word_idx, word_idx + len(topic_words)): - if j >= len(words): - break - if j == word_idx: - tags[j] = f"{'B-긍정' if topic['positive_yn'] == 'Y' else 'B-부정'},B-{topic['topic']},B-{topic['sentiment_scale']},B-{topic['topic_score']}" +def add_period_to_texts(text_list): # 문장 마침표 추가 함수 + texts_with_period = [text.strip() + '.' if not text.strip().endswith('.') else text.strip() for text in text_list] + return texts_with_period + + +def concat_find_bbox(text, topic_data, bbox_list): + cur_dict_list = [] + text_spacingx = text.replace('\n', '') + text_spacingx = text_spacingx.replace(' ', '') + text_spacingx = text_spacingx.strip() + + + start_pos_spacingx = 0 + end_pos_spacingx = 0 + for i in range(len(bbox_list)): + end_pos_spacingx = start_pos_spacingx + len(bbox_list[i]['text'].strip().replace(' ', '')) + bbox_list[i]['start_pos_spacingx'] = start_pos_spacingx + bbox_list[i]['end_pos_spacingx'] = end_pos_spacingx + start_pos_spacingx = end_pos_spacingx + + + + cur_pos = 0 + find_start_idx = 0 + + bbox_idx = 0 + + if (type(topic_data) == list): # 태깅 되어있는 경우 + topic_data = sorted([item for item in topic_data if isinstance(item, dict)], key=lambda x: len(x['text']), reverse=True) + new_topic_data = [] + for i in range(len(topic_data)): # len(topic_data) + text_to_find = topic_data[i]['text'] + new_start_pos = text.find(text_to_find) + new_end_pos = new_start_pos + len(text_to_find) + for j in range(len(new_topic_data)): # 검사하면서 new_pos들 업데이트해주는 부분 + if(new_topic_data[j]['start_pos'] <= new_start_pos < new_topic_data[j]['end_pos']): + new_start_pos = text.find(text_to_find,new_end_pos) + new_end_pos = new_start_pos + len(text_to_find) else: - tags[j] = f"{'I-긍정' if topic['positive_yn'] == 'Y' else 'I-부정'},I-{topic['topic']},I-{topic['sentiment_scale']},I-{topic['topic_score']}" - start_idx = end_idx - return tags - -def clean_data(our_topics): - if not our_topics: - return [] - - cleansed_topics = [] - for topic in our_topics: - if (not topic.get('text') - or not topic.get("topic") - or not topic.get("start_pos") - or not topic.get("end_pos") - or not topic.get("positive_yn") - or not topic.get("sentiment_scale") - or not topic.get("topic_score") - ): - continue - - cleansed_topics.append(topic) - - return cleansed_topics - -def process_json_file(file_path): - with open(file_path, 'r', encoding='utf-8-sig') as file: - data = json.load(file) - # print(f"현재 파일명: {file_path}") - rows = [] - sentence_counter = 1 - for item in data: - + continue + new_dict = {'text': topic_data[i]['text'], 'topic': topic_data[i]['topic'], 'start_pos': new_start_pos, 'end_pos': new_end_pos} + new_topic_data.append(new_dict) + new_topic_data = sorted([item for item in new_topic_data if isinstance(item, dict)], key=lambda x: x['start_pos'], reverse=False) - if 'our_topics' not in item or not item['our_topics']: - continue - - content = preprocess_text(item['content']) - sentences = split_content_into_sentences(content) - - # Add data cleansing about our_topics - our_topics = clean_data(item['our_topics']) - our_topics = sorted(our_topics, key=lambda x: len(x['text']), reverse=True) - - sent_idx = 0 - while sent_idx < len(sentences): - concat_sent = "" - for sent_concat_count in range(3, 0, -1): - if sent_idx + sent_concat_count > len(sentences): + for j in range(2 * len(new_topic_data) + 1): + if(j % 2 == 0 and j != 2*len(new_topic_data)): # 태깅 안 되어있는 case + start_pos = cur_pos + end_pos = new_topic_data[j//2]['start_pos'] + if(start_pos == end_pos): continue - concat_sent = " ".join(sentences[sent_idx:sent_idx+sent_concat_count]) - for topic in our_topics: - if preprocess_text(topic['text']) in concat_sent: - words = concat_sent.split() - tags = tag_sentence(concat_sent, our_topics) - for word, tag in zip(words, tags): - tag_parts = tag.split(',') - sentiment = tag_parts[0] if len(tag_parts) > 0 else 'O' - aspect = tag_parts[1] if len(tag_parts) > 1 else 'O' - sentiment_Score = tag_parts[2] if len(tag_parts) > 2 else '0' - aspect_score = tag_parts[3] if len(tag_parts) > 3 else '0' - rows.append([f"Sentence {sentence_counter}", word, sentiment, aspect, sentiment_Score, aspect_score]) - sentence_counter += 1 - sent_idx += sent_concat_count - break - else: + cur_dict = {'original_text' : text[start_pos:end_pos], 'topic' : 'O', 'bbox' : []} + if(cur_dict['original_text'] == ' ' or cur_dict['original_text'] == '\n'): + continue + cur_dict_list.append(cur_dict) + cur_pos = end_pos + elif(j % 2 == 0 and j == 2*len(new_topic_data)): # 태깅 안 되어있는 case 중 마지막 부분 분리 + start_pos = cur_pos + end_pos = len(text) + if(start_pos == end_pos): continue - break + cur_dict = {'original_text' : text[start_pos:end_pos], 'topic' : 'O', 'bbox' : []} + if(cur_dict['original_text'] == ' ' or cur_dict['original_text'] == '\n'): + continue + cur_dict_list.append(cur_dict) + cur_pos = end_pos + else: # 태깅되어 있는 데이터 처리 + start_pos = cur_pos + end_pos = new_topic_data[j//2]['end_pos'] + if (start_pos >= end_pos): + continue + cur_dict = {'original_text' : text[start_pos:end_pos], 'topic' : new_topic_data[j//2]['topic'], 'bbox' : []} + + + start_idx_found = text_spacingx.find(cur_dict['original_text'].strip().replace(' ', '').replace('\n', '') , find_start_idx) + end_idx_found = start_idx_found + len(cur_dict['original_text'].strip().replace(' ', '').replace('\n', '')) + + + while (not(bbox_list[bbox_idx]['start_pos_spacingx'] <= start_idx_found <= (bbox_list[bbox_idx]['end_pos_spacingx'] - 1))): + bbox_idx += 1 + bbox_start_idx = bbox_idx + while (not(bbox_list[bbox_idx]['start_pos_spacingx'] <= (end_idx_found - 1) <= (bbox_list[bbox_idx]['end_pos_spacingx'] - 1))): + bbox_idx += 1 + bbox_end_idx = bbox_idx + + + for idx in range(bbox_start_idx, bbox_end_idx + 1): + cur_dict['bbox'].append(bbox_list[idx]['bbox']) + + + find_start_idx = end_idx_found + + cur_dict_list.append(cur_dict) + cur_pos = end_pos + + + else: # 태깅 안 되어있는 경우 + cur_dict = {'original_text' : text, 'topic' : 'O', 'bbox' : []} + cur_dict_list.append(cur_dict) + + return cur_dict_list + + + +def preprocessing_ocr(args): + sentence_dict_list = [] + directory = args.fp + + file_list = os.listdir(directory) + + file_list.sort() + + for filename in file_list: + if filename.endswith('.json'): # JSON 파일인지 확인 + filepath = os.path.join(directory, filename) + with open(filepath, 'r', encoding='utf-8-sig') as file: + ocr_list = json.load(file) # JSON 파일 불러오기 + for n in range(len(ocr_list)): + text = ocr_list[n][0] + topic_data = ocr_list[n][1] + topic_data = sorted([item for item in ocr_list[n][1] if isinstance(item, dict)], key=lambda x: x['start_pos'], reverse=False) + bbox_list = ocr_list[n][2:] + sentence_dict_list.extend(concat_find_bbox(text, topic_data, bbox_list)) + print(f"{filename} Processed") + + + split_sentence_dict_list = [] + + + for i in range(len(sentence_dict_list)): # 전체 텍스트에 대한 리스트 원소 개수 + for j in range(len(sentence_dict_list[i]['original_text'].split('\n'))): # 하나의 원본 텍스트에 대해서 분리한 문장의 개수 + if(sentence_dict_list[i]['original_text'].split('\n')[j].strip() != ''): + split_sentence_dict_list.append({'original_text': sentence_dict_list[i]['original_text'].split('\n')[j], + 'topic': sentence_dict_list[i]['topic'], + 'bbox': sentence_dict_list[i]['bbox']}) + + for i in range(len(split_sentence_dict_list)): # 분리한 텍스트의 전체 개수 + split_sentence_dict_list[i]['original_text'] = split_sentence_dict_list[i]['original_text'].strip() + split_sentence_dict_list[i]['words'] = split_sentence_dict_list[i]['original_text'].split() + + + final_sentence_dict_list = [] + sentence_count = 0 + word_count = 0 + + # 태깅 부분 + for i in range(len(split_sentence_dict_list)): + sentence_count += 1 + for j in range(len(split_sentence_dict_list[i]['words'])): + word_count += 1 + if(split_sentence_dict_list[i]['topic'] != 'O' and j == 0): + topic = 'B-'+split_sentence_dict_list[i]['topic'] + elif(split_sentence_dict_list[i]['topic'] != 'O' and j != 0): + topic = 'I-'+split_sentence_dict_list[i]['topic'] else: - concat_sent = sentences[sent_idx] - words = concat_sent.split() - tags = tag_sentence(concat_sent, our_topics) - for word, tag in zip(words, tags): - tag_parts = tag.split(',') - sentiment = tag_parts[0] if len(tag_parts) > 0 else 'O' - aspect = tag_parts[1] if len(tag_parts) > 1 else 'O' - sentiment_Score = tag_parts[2] if len(tag_parts) > 2 else '0' - aspect_score = tag_parts[3] if len(tag_parts) > 3 else '0' - rows.append([f"Sentence {sentence_counter}", word, sentiment, aspect, sentiment_Score, aspect_score]) - sentence_counter += 1 - sent_idx += 1 - - df = pd.DataFrame(rows, columns=['Sentence #', 'Word', 'Sentiment', 'Aspect', 'Sentiment_Score', 'Aspect_Score']) - return df - -def process_json_files_in_folder(now_path, result_path): - json_file_path = now_path - output_csv_path = result_path - - df = process_json_file(json_file_path) - df.to_csv(output_csv_path, index=False) - print(f"Processed and saved as {output_csv_path}") + topic = split_sentence_dict_list[i]['topic'] + + + row_dict = {'Sentence #': 'Sentence '+ str(sentence_count), + 'Word': split_sentence_dict_list[i]['words'][j], + 'Aspect': topic, + "Bbox": split_sentence_dict_list[i]['bbox']} + final_sentence_dict_list.append(row_dict) + + print("단어 개수:", word_count) + print("문장 개수:", sentence_count) + return final_sentence_dict_list + def json_2_csv(args): - json_list=os.listdir(args.fp) - result_path=[] - now_path=[] - for file_name in json_list: - if file_name.endswith(".json"): - now_path.append(os.path.join(args.fp, file_name)) - result_fp = making_result_fp(args, file_name) - result_path.append(result_fp) - - for a, b in zip(now_path, result_path): - process_json_files_in_folder(a, b) \ No newline at end of file + if not os.path.exists(args.save_p): + os.makedirs(args.save_p) + + csv_file = os.path.join(args.save_p, 'output.csv') + + final_sent_dict_list = preprocessing_ocr(args) + + with open(csv_file, mode = 'w', newline='', encoding='utf-8-sig') as file: + writer = csv.DictWriter(file, fieldnames = final_sent_dict_list[0].keys()) + writer.writeheader() + for row_dict in final_sent_dict_list: + writer.writerow(row_dict) diff --git a/ocr_tagging/src_ocr/data_manager/parsers/label_unification/label_map.py b/ocr_tagging/src_ocr/data_manager/parsers/label_unification/label_map.py index a7b8589..f1bec71 100644 --- a/ocr_tagging/src_ocr/data_manager/parsers/label_unification/label_map.py +++ b/ocr_tagging/src_ocr/data_manager/parsers/label_unification/label_map.py @@ -1,20 +1,15 @@ # 대분류 Asepct Category 매핑 Dictionary # value (list) 내에 포함된 label을 key 값으로 변경 label_map_dict = { - "디자인": ["커스터마이징", "그립감", "색감", "로고없음", "재질"], - "안전": ["인증", "발열", "과충전방지", "과전류"], - "서비스": ["AS", "환불", "문의", "교환", "수리", "보험", "배송"], - - "기능": ["멀티포트", "거치", "부착", "디스플레이", "잔량표시", "충전표시",], - - "충전": ["고속충전", "동시충전","저전력", "무선충전", "맥세이프", "배터리충전속도",], - - "휴대성": ["사이즈", "무게"], - - "기타":["기내반입", "수명", "친환경", "구성품", "케이블", "파우치", "케이스"], - "배터리를충전하는호환성":["호환성"], + "디자인": ["커스터마이징", "그립감", "색감", "로고없음", "재질","디자인"], + "안전": ["인증", "발열", "과충전방지", "과전류","안전"], + "서비스": ["AS", "환불", "문의", "교환", "수리", "보험", "배송","서비스", "배송/포장/발송"], + "기능": ["멀티포트", "거치", "부착", "디스플레이", "잔량표시", "충전표시","기능"], + "충전": ["고속충전", "동시충전","저전력", "무선충전", "맥세이프", "배터리충전속도","충전"], + "휴대성": ["사이즈", "무게","휴대성"], + "기타":["기내반입", "수명", "친환경", "구성품", "케이블", "파우치", "케이스","기타"], + "배터리를충전하는호환성":["호환성","배터리를충전하는호환성"], "배터리용량":["배터리용량"], - "제조/유통/서비스":["제조일/제조사", "서비스", "유통기한"] } diff --git a/ocr_tagging/src_ocr/data_manager/parsers/split_csv.py b/ocr_tagging/src_ocr/data_manager/parsers/split_csv.py index 728d7eb..759a309 100644 --- a/ocr_tagging/src_ocr/data_manager/parsers/split_csv.py +++ b/ocr_tagging/src_ocr/data_manager/parsers/split_csv.py @@ -7,11 +7,8 @@ def print_dist(d, flag): print(d.head()) asp = pd.DataFrame([dict(Counter(d["Aspect"]))]).T - pol = pd.DataFrame([dict(Counter(d["Sentiment"]))]).T print("\n" + '*' * 50) print("About {example}\t".format(example=flag)) - print("<감정 레이블 분포>") - print(pol) print("<속성 레이블 분포>") print(asp) @@ -29,23 +26,20 @@ def file_split(args): if not os.path.exists(os.path.join(args.save_p, p)): os.makedirs(os.path.join(args.save_p, p)) print(file_list) - total_df = pd.read_csv(file_list[0]) - for file in file_list[1:]: - last_num=total_df['Sentence #'].iloc[-1] - last_num=int(last_num.replace("Sentence ","")) - df=pd.read_csv(file) - Sentence_list=df['Sentence #'].unique() - for index, sentence in enumerate(Sentence_list, start=last_num+1): - df.loc[df['Sentence #'] == sentence, 'Sentence #'] = "Sentence "+str(index) - total_df=pd.concat([total_df, df]) + + + df = pd.read_csv(file_list[0], encoding=args.encoding) + df.loc[:, "Sentence #"] = df["Sentence #"].fillna(method="ffill") # 결측치 fill + + # Sentnece ID를 기준으로 group화하여 test set 랜덤 추출 test_split = GroupShuffleSplit(test_size=args.test_ratio, n_splits=1, - random_state=42).split(total_df, groups=total_df['Sentence #']) + random_state=42).split(df, groups=df['Sentence #']) train_val_idxs, test_idxs = next(test_split) - train_val = total_df.iloc[train_val_idxs] - test = total_df.iloc[test_idxs] + train_val = df.iloc[train_val_idxs] + test = df.iloc[test_idxs] # Sentnece ID를 기준으로 group화하여 validation set 랜덤 추출 val_split = GroupShuffleSplit(test_size=args.val_ratio, diff --git a/ocr_tagging/src_ocr/do_train.py b/ocr_tagging/src_ocr/do_train.py index 6d699ac..56a7e21 100644 --- a/ocr_tagging/src_ocr/do_train.py +++ b/ocr_tagging/src_ocr/do_train.py @@ -104,7 +104,7 @@ def train(config): test_loss = eval_fn( valid_data_loader, model, - enc_aspect2, + enc_aspect, device, log ) @@ -130,8 +130,8 @@ def train(config): help="각 Aspect Category input sample의 size") parser.add_argument("--stop_patience", type=int, default=3, help="validation loss를 기준으로 성능이 증가하지 않는 " "epoch을 몇 번이나 허용할 것인지 설정") - parser.add_argument("--train_fp", type=str, default="./resources_ocr/data/train/", help="학습 데이터들이 포함된 디렉토리 경로 or 학습 데이터 파일 경로 설정") - parser.add_argument("--valid_fp", type=str, default="./resources_ocr/data/valid/", help="검증 데이터들이 포함된 디렉토리 경로 or 검증 데이터 파일 경로 설정") + parser.add_argument("--train_fp", type=str, default="./ocr_tagging/resources_ocr/parsing_data/train/", help="학습 데이터들이 포함된 디렉토리 경로 or 학습 데이터 파일 경로 설정") + parser.add_argument("--valid_fp", type=str, default="./resources_ocr/parsing_data/valid/", help="검증 데이터들이 포함된 디렉토리 경로 or 검증 데이터 파일 경로 설정") parser.add_argument("--base_path", type=str, help="Model이나 Encoder를 저장할 경로 설정", default="./ckpt_ocr/model/") parser.add_argument("--label_info_file", type=str, help="Encoder의 저장 파일명", default="meta.bin") parser.add_argument("--out_model_path", type=str, help="model의 저장 파일명", default="pytorch_model.bin") diff --git a/ocr_tagging/src_ocr/modeling/trainer.py b/ocr_tagging/src_ocr/modeling/trainer.py index 5e6580e..ea0e97f 100644 --- a/ocr_tagging/src_ocr/modeling/trainer.py +++ b/ocr_tagging/src_ocr/modeling/trainer.py @@ -49,7 +49,7 @@ def eval_fn(data_loader, model, enc_aspect, device, log, f1_mode='micro', flag=' for data in tqdm(data_loader, total=loader_len): data = parsing_batch(data, device) loss, _, predict_aspect = model(**data) - aspect_label = data['target_aspect2'].cpu().numpy().reshape(-1) + aspect_label = data['target_aspect'].cpu().numpy().reshape(-1) aspect_pred = np.array(predict_aspect).reshape(-1)