From abf9bedb751db49493e66b89c62fe73711314393 Mon Sep 17 00:00:00 2001 From: shweht Date: Sun, 6 Dec 2020 23:59:32 -0500 Subject: [PATCH] make truncation fix --- sentiment/bert_eval.py | 2 +- sentiment/sentiment.py | 3 +- sentiment/sentiment_helpers.py | 64 ++++++++++------------------------ 3 files changed, 21 insertions(+), 48 deletions(-) diff --git a/sentiment/bert_eval.py b/sentiment/bert_eval.py index d13951e..3c3960a 100644 --- a/sentiment/bert_eval.py +++ b/sentiment/bert_eval.py @@ -50,7 +50,7 @@ def load(self, path:str): Takes in a tweet and calculates a sentiment prediction confidences """ def score(self, text): - encoding = self.tokenizer(text, return_tensors="pt", padding=True) + encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=35) inputs = encoding["input_ids"].to(self.device) logits = self.model(inputs, labels=None)[0] temp = torch.flatten(logits.cpu()) diff --git a/sentiment/sentiment.py b/sentiment/sentiment.py index e8a4d89..ba65127 100644 --- a/sentiment/sentiment.py +++ b/sentiment/sentiment.py @@ -35,8 +35,7 @@ bert = BertSentiment(config.model_path) #Initialize elasticsearch settings -print(config.elasticsearch_verify_certs) -es = Elasticsearch(hosts=[config.elasticsearch_host], +es = Elasticsearch(hosts=[config.elasticsearch_host], verify_certs=config.elasticsearch_verify_certs, timeout=config.elasticsearch_timeout_secs) diff --git a/sentiment/sentiment_helpers.py b/sentiment/sentiment_helpers.py index bdebd2b..219aeef 100644 --- a/sentiment/sentiment_helpers.py +++ b/sentiment/sentiment_helpers.py @@ -1,47 +1,6 @@ import re def get_query(): -# query = { -# "_source": [ -# "text", -# "full_text", -# "extended_tweet.full_text", -# "quoted_status.text", -# "quoted_status.full_text", -# "quoted_status.extended_tweet.full_text" -# ], -# "query": { -# "bool": { -# "filter": [ -# { -# "bool": { -# "must_not": [ -# { -# "exists": { -# "field": "sentiment.vader.primary" -# } -# }, -# { -# "exists": { -# "field": "sentiment.bert.scores" -# } -# } -# ] -# } -# }, -# { -# "bool": { -# "must_not": { -# "exists": { -# "field": "retweeted_status.id" -# } -# } -# } -# } -# ] -# } -# } -# } query = { "_source": [ "text", @@ -56,11 +15,26 @@ def get_query(): "filter": [ { "bool": { - "must_not": { - "exists": { - "field": "sentiment.vader.primary" + "should": [{ + "bool": { + "must_not": { + "exists": { + "field": "sentiment.vader.primary" + } + } } - } + }, + { + "bool": { + "must_not": { + "exists": { + "field": "sentiment.bert.class" + } + } + } + } + ], + "minimum_should_match" : 1 } }, {