diff --git a/docs/Oscar.docx b/docs/Oscar.docx new file mode 100644 index 0000000..fa9043d Binary files /dev/null and b/docs/Oscar.docx differ diff --git a/soa-api/src/soa/api/topics_models.py b/soa-api/src/soa/api/topics_models.py index 4a3ec15..8962c83 100644 --- a/soa-api/src/soa/api/topics_models.py +++ b/soa-api/src/soa/api/topics_models.py @@ -8,21 +8,6 @@ from soa.run import api -tweet_model = api.model("Tweet information", { - 'url': fields.String(example='https://twitter.com/NASA/status/967824267948773377', description='URL to the tweet'), - 'text': fields.String(example='This is a tweet about COVID #covid-19', description='Text of the tweet extracted'), - 'sentiment': fields.String(example='positive', description='Sentiment result of text analysis'), -}, description='Information of Tweet data in the API.') - -topics_model_2 = api.model("Topics subinformation", { - 'name': fields.String(example='covid AND christmas AND famous', description='Theme or topic of the tweets extracted'), - 'tweets': fields.Nested(tweet_model, description='Tweets retrieved from the topics given.', as_list=True) -}, description='Information of Topics subdata in the API.') - -topics_model_1 = api.model("Topics main information", { - 't1': fields.Nested(topics_model_2, description='Tweets retrieved from the topics given.', as_list=True) -}, description='Information of Topics main data in the API.') - source_model = api.model("Source News information", { 'id': fields.String(example='usa-today', description='Id of the newspaper source of the news.'), 'name': fields.String(example='USA Today', description='Name of the newspaper source of the news.') diff --git a/soa-api/src/soa/models/covid_model_eu.py b/soa-api/src/soa/models/covid_model_eu.py index 2a97cd2..70a0a29 100644 --- a/soa-api/src/soa/models/covid_model_eu.py +++ b/soa-api/src/soa/models/covid_model_eu.py @@ -18,7 +18,7 @@ def _do_request(self, uri: str) -> pd.DataFrame: uri (:obj:`str`): dataset's URI. Returns: - :obj:`pd.DataFrame` result from the request, contianing the COVID information worldwide per country. + :obj:`pd.DataFrame`: result from the request, contianing the COVID information worldwide per country. """ try: @@ -67,11 +67,12 @@ def _format_response(self, response:list) -> list: transform_country = lambda x: ' '.join(x.split('_')).replace('(',' ').replace(')',' ') + print(response) return [{ 'date': r['dateRep'].isoformat(), 'country': transform_country(r['countriesAndTerritories']), - 'cases': r['cases'], - 'cases': r['deaths'] + 'cases': r['cases_weekly'], + 'cases': r['deaths_weekly'] } for r in response] def extract(self, from_date=None, to_date=None): diff --git a/soa-api/src/soa/models/location_iq_model.py b/soa-api/src/soa/models/location_iq_model.py index 3492da7..2236079 100644 --- a/soa-api/src/soa/models/location_iq_model.py +++ b/soa-api/src/soa/models/location_iq_model.py @@ -26,7 +26,7 @@ def get_coordinates(self, place:str, max_tries:int=10, time_between_tries:int=1) time_between_tries (:int, optional): Time between tries to obtain location info. Returns: - :obj:`dict` contanining the information in matter of geocoding of the given place. + :obj:`dict`: contanining the information in matter of geocoding of the given place. """ for i in range(max_tries+1): diff --git a/soa-api/src/soa/models/news_model.py b/soa-api/src/soa/models/news_model.py index 33cf3a6..0862238 100644 --- a/soa-api/src/soa/models/news_model.py +++ b/soa-api/src/soa/models/news_model.py @@ -23,11 +23,11 @@ def get_news_top_headlines(self, Retrieve top news in a country specified and containing a keyword given in a query Arguments: - query (:obj:`str`) -- keyword to find in top news - country_code (:obj:`str`) -- two letter code to specify the country (default: {DEFAULT_NEWS_COUNTRY}) + query (:obj:`str`): keyword to find in top news + country_code (:obj:`str`): two letter code to specify the country (default: {DEFAULT_NEWS_COUNTRY}) Returns: - :obj:`list` -- list of dictionaries containing information about the news retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the news retrieved """ # Empty list to store parsed news @@ -77,14 +77,14 @@ def get_news_everything(self, Retrieve every news in a range of time, in an specific language and containing in their title a keyword given Arguments: - q (:obj:`str`) -- keyword to find in the title of news - from_date (:obj:`str`, optional) -- beginning date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')}) - to_date (:obj:`str`, optional) -- end date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')}) - lang (:obj:`str`, optional) -- language ot the news (default: {DEFAULT_NEWS_LANGUAGE}) - count (:obj:`int`, optional) -- number of news to retrieve (default: {DEFAULT_NUM_NEWS_EXTRACTED}) + q (:obj:`str`): keyword to find in the title of news + from_date (:obj:`str`, optional): beginning date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')}) + to_date (:obj:`str`, optional): end date point to retrieve news (default: {datetime.date.today().strftime('%Y-%m-%d')}) + lang (:obj:`str`, optional): language ot the news (default: {DEFAULT_NEWS_LANGUAGE}) + count (:obj:`int`, optional): number of news to retrieve (default: {DEFAULT_NUM_NEWS_EXTRACTED}) Returns: - :obj:`list` -- list of dictionaries containing information about the news retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the news retrieved """ # Empty list to store parsed news @@ -128,10 +128,10 @@ def _format_title(self, title: str) -> str: Write title in the proper format Arguments: - title (:obj:`str`) -- title to be formatted + title (:obj:`str`): title to be formatted Returns: - :obj:`str` title with the proper format + :obj:`str`: title with the proper format """ title_list = [] @@ -150,10 +150,10 @@ def _format_date(self, date_hour: str) -> str: Write date in the proper format Arguments: - date_hour (:obj:`str`) -- date to be formatted + date_hour (:obj:`str`): date to be formatted Returns: - :obj:`str` date with the proper format + :obj:`str`: date with the proper format """ date_list = [] @@ -172,10 +172,10 @@ def __clean_data(self, text: str) -> str: Processes data and cleans it as entry for the vectorizer Arguments: - text (:obj:`str`) -- text preprocessed + text (:obj:`str`): text preprocessed Returns: - (:obj:`str`) -- cleaned and postprocessed text + :obj:`str`: cleaned and postprocessed text """ if text is not None: # Remove punctuation diff --git a/soa-api/src/soa/models/sentiment_model.py b/soa-api/src/soa/models/sentiment_model.py index f5f0423..aed1b0d 100644 --- a/soa-api/src/soa/models/sentiment_model.py +++ b/soa-api/src/soa/models/sentiment_model.py @@ -19,9 +19,10 @@ def clean_analyzer(self, text: str): so that it only contains words and spaces between them. Args: - text: This is the string that contains the tweet received from Twitter + text (:obj:`str`): This is the string that contains the tweet received from Twitter + Returns: - string: postcleaning text string (only words and spaces) + :obj:`str`: postcleaning text string (only words and spaces) """ # IMPORTANT: in Spanish sentiment analysis is important to keep ortographic accents @@ -82,13 +83,13 @@ def analyze(self, text: str, clean_text: bool = False) -> Dict: """Sentiment analyzer of text Arguments: - text {str} -- sentence to analyze + text (:obj:`str`):sentence to analyze Keyword Arguments: - clean_text {bool} -- flag indicating if the text must be cleaned or not (default: {False}) + clean_text (:obj:`bool`, optional): flag indicating if the text must be cleaned or not (default: {False}) Returns: - str -- sentiment result of the analyzer + :obj:`str`: sentiment result of the analyzer """ sentiment_result = "" diff --git a/soa-api/src/soa/models/topic_modelling_model.py b/soa-api/src/soa/models/topic_modelling_model.py index 0ff0976..5719d22 100644 --- a/soa-api/src/soa/models/topic_modelling_model.py +++ b/soa-api/src/soa/models/topic_modelling_model.py @@ -22,7 +22,7 @@ def __init__(self, num_topics: int = NUM_TOPICS): Extracts topics from a text using LDA (Latent Diriechit Allocation) to get themes from main kewyords in multiple documents Keyword Arguments: - num_topics (:obj:`int`, optional) -- number of topics to extract from text (default: {NUM_TOPICS}) + num_topics (:obj:`int`, optional): number of topics to extract from text (default: {NUM_TOPICS}) """ # Create model component handler self._lda = LDA(n_components=num_topics) @@ -35,10 +35,10 @@ def __clean_data(self, text: str) -> str: Processes data and cleans it as entry for the vectorizer Arguments: - text (:obj:`str`) -- text preprocessed + text (:obj:`str`): text preprocessed Returns: - (:obj:`str`) -- cleaned and postprocessed text + :obj:`str`: cleaned and postprocessed text """ # Remove punctuation text_processed = re.sub('[,\.!?]', '', text) @@ -53,7 +53,7 @@ def __vectorize_data(self, text: str): Transforms text into a vector of 0s and 1s as entry of the LDA model. Arguments: - text (:obj:`str`) -- text cleaned and postprocessed + text (:obj:`str`): text cleaned and postprocessed """ # Fit and transform the processed titles vec_data = self._count_vectorizer.fit_transform([text]) @@ -65,8 +65,8 @@ def get_topics(self, text: str, words_per_topic: int = NUM_WORDS) -> dict: Returns the number of topics predicted in the text given. Arguments: - text (:obj:`str`) -- [description] - words_per_topic (:obj:`int`, optional) -- [description] (default: {NUM_WORDS}) + text (:obj:`str`): text to get topics from + words_per_topic (:obj:`int`, optional): number of words to extract per topic (default: {NUM_WORDS}) Return: :obj:`dict`: structure containing the list of topics extracted from a text given @@ -95,9 +95,9 @@ def __summary_topics(self, model, vectorizer, n_top_words: int) -> list: Gets number of topics predicted and stores them into a list Arguments: - model -- pretrained LDA model - vectorizer -- pretrained Count Vectorizer vectorizer - n_top_words (:obj:`int`, optional) -- number of words to analyze from each topic + model: pretrained LDA model + vectorizer: pretrained Count Vectorizer vectorizer + n_top_words (:obj:`int`, optional): number of words to analyze from each topic Returns: :obj:`list` of :obj:`str`: list of topics extracted diff --git a/soa-api/src/soa/models/twitter_model.py b/soa-api/src/soa/models/twitter_model.py index 6312efa..737ea75 100644 --- a/soa-api/src/soa/models/twitter_model.py +++ b/soa-api/src/soa/models/twitter_model.py @@ -35,16 +35,16 @@ def get_tweets_single_query(self, """Retrieve tweets containing a keyword given in a query Arguments: - query {str} -- keyword to find in tweets + query (:obj:`str`): keyword to find in tweets Keyword Arguments: - count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) - lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) - start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) + lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) + start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) Returns: - :obj:`list` -- list of dictionaries containing information about the tweets retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved """ # Empty list to store parsed tweets @@ -94,17 +94,17 @@ def get_tweets_multiple_query(self, """Retrieve tweets containing a keyword given in a query Arguments: - query {List[str]} -- list to find in tweets + query (:obj:`list` of :obj:`str`): list to find in tweets Keyword Arguments: - count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) - lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) - start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - include_both {bool} -- flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False}) + count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) + lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) + start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + include_both (:obj:`bool`, optional): flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False}) Returns: - :obj:`list` -- list of dictionaries containing information about the tweets retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved """ multiple_query = "" separator = " OR " @@ -138,16 +138,16 @@ def get_tweets_with_bearer_token(self, Twitter API to retrieve more tweets and more info Arguments: - query {str} -- list to find in tweets + query (:obj:`str`): list to find in tweets Keyword Arguments: - count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) - lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) - start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) + lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) + start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) Returns: - :obj:`list` -- list of dictionaries containing information about the tweets retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved """ tweets = [] @@ -201,17 +201,17 @@ def get_tweets_multiple_with_bearer_token(self, Twitter API to retrieve more tweets and more info Arguments: - query {List[str]} -- list to find in tweets + query (:obj:`list` of :obj:`str`): list to find in tweets Keyword Arguments: - count {int} -- number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) - lang {str} -- language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) - start_date {str} -- beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - end_date {str} -- end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) - include_both {bool} -- flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False}) + count (:obj:`int`, optional): number of tweets to retrieve (default: {DEFAULT_NUM_TWEETS_EXTRACTED}) + lang (:obj:`str`, optional): language ot the tweets (default: {DEFAULT_TWEETS_LANGUAGE}) + start_date (:obj:`str`, optional): beginning date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + end_date (:obj:`str`, optional): end date point to retrieve tweets (default: {datetime.date.today().strftime('%Y-%m-%d')}) + include_both (:obj:`bool`, optional): flag indicating if the tweets to retrieve will only contain all the keywords in the query or not (default: {False}) Returns: - :obj:`list` -- list of dictionaries containing information about the tweets retrieved + :obj:`list` of :obj:`dict`: list of dictionaries containing information about the tweets retrieved """ multiple_query = "" @@ -240,13 +240,13 @@ def extract_text(self, obj: dict, clean_text: bool = False) -> str: """Extracts text from tweet object status Arguments: - obj {dict} -- status with information about a tweet from Twitter API + obj (:obj:`dict`): status with information about a tweet from Twitter API Keyword Arguments: - clean_text {bool} -- flag indicating if the text of the tweet must be processed or not (default: {False}) + clean_text (:obj:`bool`, optional): flag indicating if the text of the tweet must be processed or not (default: {False}) Returns: - str -- text of the tweet + :obj:`str`: text of the tweet """ text = obj.full_text.encode('utf-8').decode('utf-8') if clean_text: @@ -257,10 +257,10 @@ def extract_url(self, obj: dict) -> str: """Extracts text from tweet object status Arguments: - obj {dict} -- status with information about a tweet from Twitter API + obj (:obj:`dict`): status with information about a tweet from Twitter API Returns: - str -- url of the tweet + :obj:`str`: url of the tweet """ url = "https://twitter.com/twitter/statuses/" + str(obj.id) return url @@ -269,10 +269,10 @@ def extract_date_of_creation(self, obj: dict) -> str: """Extracts date and time from tweet object status Arguments: - obj {dict} -- status with information about a tweet from Twitter API + obj (:obj:`dict`): status with information about a tweet from Twitter API Returns: - str -- tweet´s publishing date and time + :obj:`str`: tweet´s publishing date and time """ return obj.created_at.strftime('%Y-%m-%dT%H:%M:%S') @@ -280,10 +280,10 @@ def extract_geolocation(self, obj: dict) -> str: """Extracts country and city information from tweet object status Arguments: - obj {dict} -- status with information about a tweet from Twitter API + obj (:obj:`dict`): status with information about a tweet from Twitter API Returns: - str -- tweet´s location + :obj:`str`: tweet´s location """ return obj.geo @@ -291,9 +291,9 @@ def extract_coordinates(self, obj: dict) -> str: """Extracts location coordinates from tweet object status Arguments: - obj {dict} -- status with information about a tweet from Twitter API + obj (:obj:`dict`): status with information about a tweet from Twitter API Returns: - str -- tweet´s coordinates in (lat - long) format + :obj:`str`: tweet´s coordinates in (lat - long) format """ return obj.coordinates