Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
GandalFran committed Dec 18, 2020
2 parents 13a92e1 + 5dbafbb commit 4af1303
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 85 deletions.
Binary file added docs/Oscar.docx
Binary file not shown.
15 changes: 0 additions & 15 deletions soa-api/src/soa/api/topics_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,6 @@

from soa.run import api

tweet_model = api.model("Tweet information", {
'url': fields.String(example='https://twitter.com/NASA/status/967824267948773377', description='URL to the tweet'),
'text': fields.String(example='This is a tweet about COVID #covid-19', description='Text of the tweet extracted'),
'sentiment': fields.String(example='positive', description='Sentiment result of text analysis'),
}, description='Information of Tweet data in the API.')

topics_model_2 = api.model("Topics subinformation", {
'name': fields.String(example='covid AND christmas AND famous', description='Theme or topic of the tweets extracted'),
'tweets': fields.Nested(tweet_model, description='Tweets retrieved from the topics given.', as_list=True)
}, description='Information of Topics subdata in the API.')

topics_model_1 = api.model("Topics main information", {
't1': fields.Nested(topics_model_2, description='Tweets retrieved from the topics given.', as_list=True)
}, description='Information of Topics main data in the API.')

source_model = api.model("Source News information", {
'id': fields.String(example='usa-today', description='Id of the newspaper source of the news.'),
'name': fields.String(example='USA Today', description='Name of the newspaper source of the news.')
Expand Down
7 changes: 4 additions & 3 deletions soa-api/src/soa/models/covid_model_eu.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _do_request(self, uri: str) -> pd.DataFrame:
uri (:obj:`str`): dataset's URI.
Returns:
:obj:`pd.DataFrame` result from the request, contianing the COVID information worldwide per country.
:obj:`pd.DataFrame`: result from the request, contianing the COVID information worldwide per country.
"""

try:
Expand Down Expand Up @@ -67,11 +67,12 @@ def _format_response(self, response:list) -> list:

transform_country = lambda x: ' '.join(x.split('_')).replace('(',' ').replace(')',' ')

print(response)
return [{
'date': r['dateRep'].isoformat(),
'country': transform_country(r['countriesAndTerritories']),
'cases': r['cases'],
'cases': r['deaths']
'cases': r['cases_weekly'],
'cases': r['deaths_weekly']
} for r in response]

def extract(self, from_date=None, to_date=None):
Expand Down
2 changes: 1 addition & 1 deletion soa-api/src/soa/models/location_iq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_coordinates(self, place:str, max_tries:int=10, time_between_tries:int=1)
time_between_tries (:int, optional): Time between tries to obtain location info.
Returns:
:obj:`dict` contanining the information in matter of geocoding of the given place.
:obj:`dict`: contanining the information in matter of geocoding of the given place.
"""

for i in range(max_tries+1):
Expand Down
30 changes: 15 additions & 15 deletions soa-api/src/soa/models/news_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ def get_news_top_headlines(self,
Retrieve top news in a country specified and containing a keyword given in a query
Arguments:
query (:obj:`str`) -- keyword to find in top news
country_code (:obj:`str`) -- two letter code to specify the country (default: {DEFAULT_NEWS_COUNTRY})
query (:obj:`str`): keyword to find in top news
country_code (:obj:`str`): two letter code to specify the country (default: {DEFAULT_NEWS_COUNTRY})
Returns:
:obj:`list` -- list of dictionaries containing information about the news retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the news retrieved
"""

# Empty list to store parsed news
Expand Down Expand Up @@ -77,14 +77,14 @@ def get_news_everything(self,
Retrieve every news in a range of time, in an specific language and containing in their title a keyword given
Arguments:
q (:obj:`str`) -- keyword to find in the title of news
from_date (:obj:`str`, optional) -- beginning date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')})
to_date (:obj:`str`, optional) -- end date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')})
lang (:obj:`str`, optional) -- language ot the news (default: {DEFAULT_NEWS_LANGUAGE})
count (:obj:`int`, optional) -- number of news to retrieve (default: {DEFAULT_NUM_NEWS_EXTRACTED})
q (:obj:`str`): keyword to find in the title of news
from_date (:obj:`str`, optional): beginning date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')})
to_date (:obj:`str`, optional): end date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')})
lang (:obj:`str`, optional): language ot the news (default: {DEFAULT_NEWS_LANGUAGE})
count (:obj:`int`, optional): number of news to retrieve (default: {DEFAULT_NUM_NEWS_EXTRACTED})
Returns:
:obj:`list` -- list of dictionaries containing information about the news retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the news retrieved
"""

# Empty list to store parsed news
Expand Down Expand Up @@ -128,10 +128,10 @@ def _format_title(self, title: str) -> str:
Write title in the proper format
Arguments:
title (:obj:`str`) -- title to be formatted
title (:obj:`str`): title to be formatted
Returns:
:obj:`str` title with the proper format
:obj:`str`: title with the proper format
"""

title_list = []
Expand All @@ -150,10 +150,10 @@ def _format_date(self, date_hour: str) -> str:
Write date in the proper format
Arguments:
date_hour (:obj:`str`) -- date to be formatted
date_hour (:obj:`str`): date to be formatted
Returns:
:obj:`str` date with the proper format
:obj:`str`: date with the proper format
"""

date_list = []
Expand All @@ -172,10 +172,10 @@ def __clean_data(self, text: str) -> str:
Processes data and cleans it as entry for the vectorizer
Arguments:
text (:obj:`str`) -- text preprocessed
text (:obj:`str`): text preprocessed
Returns:
(:obj:`str`) -- cleaned and postprocessed text
:obj:`str`: cleaned and postprocessed text
"""
if text is not None:
# Remove punctuation
Expand Down
11 changes: 6 additions & 5 deletions soa-api/src/soa/models/sentiment_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ def clean_analyzer(self, text: str):
so that it only contains words and spaces between them.
Args:
text: This is the string that contains the tweet received from Twitter
text (:obj:`str`): This is the string that contains the tweet received from Twitter
Returns:
string: postcleaning text string (only words and spaces)
:obj:`str`: postcleaning text string (only words and spaces)
"""

# IMPORTANT: in Spanish sentiment analysis is important to keep ortographic accents
Expand Down Expand Up @@ -82,13 +83,13 @@ def analyze(self, text: str, clean_text: bool = False) -> Dict:
"""Sentiment analyzer of text
Arguments:
text {str} -- sentence to analyze
text (:obj:`str`):sentence to analyze
Keyword Arguments:
clean_text {bool} -- flag indicating if the text must be cleaned or not (default: {False})
clean_text (:obj:`bool`, optional): flag indicating if the text must be cleaned or not (default: {False})
Returns:
str -- sentiment result of the analyzer
:obj:`str`: sentiment result of the analyzer
"""
sentiment_result = ""

Expand Down
18 changes: 9 additions & 9 deletions soa-api/src/soa/models/topic_modelling_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, num_topics: int = NUM_TOPICS):
Extracts topics from a text using LDA (Latent Diriechit Allocation) to get themes from main kewyords in multiple documents
Keyword Arguments:
num_topics (:obj:`int`, optional) -- number of topics to extract from text (default: {NUM_TOPICS})
num_topics (:obj:`int`, optional): number of topics to extract from text (default: {NUM_TOPICS})
"""
# Create model component handler
self._lda = LDA(n_components=num_topics)
Expand All @@ -35,10 +35,10 @@ def __clean_data(self, text: str) -> str:
Processes data and cleans it as entry for the vectorizer
Arguments:
text (:obj:`str`) -- text preprocessed
text (:obj:`str`): text preprocessed
Returns:
(:obj:`str`) -- cleaned and postprocessed text
:obj:`str`: cleaned and postprocessed text
"""
# Remove punctuation
text_processed = re.sub('[,\.!?]', '', text)
Expand All @@ -53,7 +53,7 @@ def __vectorize_data(self, text: str):
Transforms text into a vector of 0s and 1s as entry of the LDA model.
Arguments:
text (:obj:`str`) -- text cleaned and postprocessed
text (:obj:`str`): text cleaned and postprocessed
"""
# Fit and transform the processed titles
vec_data = self._count_vectorizer.fit_transform([text])
Expand All @@ -65,8 +65,8 @@ def get_topics(self, text: str, words_per_topic: int = NUM_WORDS) -> dict:
Returns the number of topics predicted in the text given.
Arguments:
text (:obj:`str`) -- [description]
words_per_topic (:obj:`int`, optional) -- [description] (default: {NUM_WORDS})
text (:obj:`str`): text to get topics from
words_per_topic (:obj:`int`, optional): number of words to extract per topic (default: {NUM_WORDS})
Return:
:obj:`dict`: structure containing the list of topics extracted from a text given
Expand Down Expand Up @@ -95,9 +95,9 @@ def __summary_topics(self, model, vectorizer, n_top_words: int) -> list:
Gets number of topics predicted and stores them into a list
Arguments:
model -- pretrained LDA model
vectorizer -- pretrained Count Vectorizer vectorizer
n_top_words (:obj:`int`, optional) -- number of words to analyze from each topic
model: pretrained LDA model
vectorizer: pretrained Count Vectorizer vectorizer
n_top_words (:obj:`int`, optional): number of words to analyze from each topic
Returns:
:obj:`list` of :obj:`str`: list of topics extracted
Expand Down
74 changes: 37 additions & 37 deletions soa-api/src/soa/models/twitter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ def get_tweets_single_query(self,
"""Retrieve tweets containing a keyword given in a query
Arguments:
query {str} -- keyword to find in tweets
query (:obj:`str`): keyword to find in tweets
Keyword Arguments:
count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
Returns:
:obj:`list` -- list of dictionaries containing information about the tweets retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved
"""

# Empty list to store parsed tweets
Expand Down Expand Up @@ -94,17 +94,17 @@ def get_tweets_multiple_query(self,
"""Retrieve tweets containing a keyword given in a query
Arguments:
query {List[str]} -- list to find in tweets
query (:obj:`list` of :obj:`str`): list to find in tweets
Keyword Arguments:
count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
include_both {bool} -- flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False})
count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
include_both (:obj:`bool`, optional): flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False})
Returns:
:obj:`list` -- list of dictionaries containing information about the tweets retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved
"""
multiple_query = ""
separator = " OR "
Expand Down Expand Up @@ -138,16 +138,16 @@ def get_tweets_with_bearer_token(self,
Twitter API to retrieve more tweets and more info
Arguments:
query {str} -- list to find in tweets
query (:obj:`str`): list to find in tweets
Keyword Arguments:
count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
Returns:
:obj:`list` -- list of dictionaries containing information about the tweets retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved
"""
tweets = []

Expand Down Expand Up @@ -201,17 +201,17 @@ def get_tweets_multiple_with_bearer_token(self,
Twitter API to retrieve more tweets and more info
Arguments:
query {List[str]} -- list to find in tweets
query (:obj:`list` of :obj:`str`): list to find in tweets
Keyword Arguments:
count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
include_both {bool} -- flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False})
count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED})
lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE})
start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')})
include_both (:obj:`bool`, optional): flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False})
Returns:
:obj:`list` -- list of dictionaries containing information about the tweets retrieved
:obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved
"""

multiple_query = ""
Expand Down Expand Up @@ -240,13 +240,13 @@ def extract_text(self, obj: dict, clean_text: bool = False) -> str:
"""Extracts text from tweet object status
Arguments:
obj {dict} -- status with information about a tweet from Twitter API
obj (:obj:`dict`): status with information about a tweet from Twitter API
Keyword Arguments:
clean_text {bool} -- flag indicating if the text of the tweet must be processed or not (default: {False})
clean_text (:obj:`bool`, optional): flag indicating if the text of the tweet must be processed or not (default: {False})
Returns:
str -- text of the tweet
:obj:`str`: text of the tweet
"""
text = obj.full_text.encode('utf-8').decode('utf-8')
if clean_text:
Expand All @@ -257,10 +257,10 @@ def extract_url(self, obj: dict) -> str:
"""Extracts text from tweet object status
Arguments:
obj {dict} -- status with information about a tweet from Twitter API
obj (:obj:`dict`): status with information about a tweet from Twitter API
Returns:
str -- url of the tweet
:obj:`str`: url of the tweet
"""
url = "https://twitter.com/twitter/statuses/" + str(obj.id)
return url
Expand All @@ -269,31 +269,31 @@ def extract_date_of_creation(self, obj: dict) -> str:
"""Extracts date and time from tweet object status
Arguments:
obj {dict} -- status with information about a tweet from Twitter API
obj (:obj:`dict`): status with information about a tweet from Twitter API
Returns:
str -- tweet´s publishing date and time
:obj:`str`: tweet´s publishing date and time
"""
return obj.created_at.strftime('%Y-%m-%dT%H:%M:%S')

def extract_geolocation(self, obj: dict) -> str:
"""Extracts country and city information from tweet object status
Arguments:
obj {dict} -- status with information about a tweet from Twitter API
obj (:obj:`dict`): status with information about a tweet from Twitter API
Returns:
str -- tweet´s location
:obj:`str`: tweet´s location
"""
return obj.geo

def extract_coordinates(self, obj: dict) -> str:
"""Extracts location coordinates from tweet object status
Arguments:
obj {dict} -- status with information about a tweet from Twitter API
obj (:obj:`dict`): status with information about a tweet from Twitter API
Returns:
str -- tweet´s coordinates in (lat - long) format
:obj:`str`: tweet´s coordinates in (lat - long) format
"""
return obj.coordinates

0 comments on commit 4af1303

Please sign in to comment.