Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

match dict instead of hardcode trim_chars #458

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 22 additions & 65 deletions pytrends/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timedelta

import pandas as pd
import re
import requests

from pandas.io.json._normalize import nested_to_record
Expand Down Expand Up @@ -106,7 +107,7 @@ def GetNewProxy(self):
else:
self.proxy_index = 0

def _get_data(self, url, method=GET_METHOD, trim_chars=0, **kwargs):
def _get_data(self, url, method=GET_METHOD, **kwargs):
"""Send a request to Google and return the JSON response as a Python object
:param url: the url to which the request will be sent
:param method: the HTTP method ('get' or 'post')
Expand Down Expand Up @@ -147,7 +148,8 @@ def _get_data(self, url, method=GET_METHOD, trim_chars=0, **kwargs):
# trim initial characters
# some responses start with garbage characters, like ")]}',"
# these have to be cleaned before being passed to the json parser
content = response.text[trim_chars:]
content_match = re.search(r'{.+}', response.text)
content = content_match.group(0)
# parse json
self.GetNewProxy()
return json.loads(content)
Expand Down Expand Up @@ -185,12 +187,7 @@ def build_payload(self, kw_list, cat=0, timeframe='today 5-y', geo='',
def _tokens(self):
"""Makes request to Google to get API tokens for interest over time, interest by region and related queries"""
# make the request and parse the returned json
widget_dict = self._get_data(
url=TrendReq.GENERAL_URL,
method=TrendReq.GET_METHOD,
params=self.token_payload,
trim_chars=4,
)['widgets']
widget_dict = self._get_data(url=TrendReq.GENERAL_URL, method=TrendReq.GET_METHOD, params=self.token_payload)['widgets']
# order of the json matters...
first_region_token = True
# clear self.related_queries_widget_list and self.related_topics_widget_list
Expand Down Expand Up @@ -222,12 +219,8 @@ def interest_over_time(self):
}

# make the request and parse the returned json
req_json = self._get_data(
url=TrendReq.INTEREST_OVER_TIME_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=over_time_payload,
)
req_json = self._get_data(url=TrendReq.INTEREST_OVER_TIME_URL, method=TrendReq.GET_METHOD,
params=over_time_payload)

df = pd.DataFrame(req_json['default']['timelineData'])
if (df.empty):
Expand Down Expand Up @@ -287,12 +280,8 @@ def interest_by_region(self, resolution='COUNTRY', inc_low_vol=False,
region_payload['tz'] = self.tz

# parse returned json
req_json = self._get_data(
url=TrendReq.INTEREST_BY_REGION_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=region_payload,
)
req_json = self._get_data(url=TrendReq.INTEREST_BY_REGION_URL, method=TrendReq.GET_METHOD,
params=region_payload)
df = pd.DataFrame(req_json['default']['geoMapData'])
if (df.empty):
return df
Expand Down Expand Up @@ -332,12 +321,8 @@ def related_topics(self):
related_payload['tz'] = self.tz

# parse the returned json
req_json = self._get_data(
url=TrendReq.RELATED_QUERIES_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=related_payload,
)
req_json = self._get_data(url=TrendReq.RELATED_QUERIES_URL, method=TrendReq.GET_METHOD,
params=related_payload)

# top topics
try:
Expand Down Expand Up @@ -381,12 +366,8 @@ def related_queries(self):
related_payload['tz'] = self.tz

# parse the returned json
req_json = self._get_data(
url=TrendReq.RELATED_QUERIES_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=related_payload,
)
req_json = self._get_data(url=TrendReq.RELATED_QUERIES_URL, method=TrendReq.GET_METHOD,
params=related_payload)

# top queries
try:
Expand Down Expand Up @@ -415,24 +396,15 @@ def trending_searches(self, pn='united_states'):
# make the request
# forms become obsolete due to the new TRENDING_SEARCHES_URL
# forms = {'ajax': 1, 'pn': pn, 'htd': '', 'htv': 'l'}
req_json = self._get_data(
url=TrendReq.TRENDING_SEARCHES_URL,
method=TrendReq.GET_METHOD,
**self.requests_args
)[pn]
req_json = self._get_data(url=TrendReq.TRENDING_SEARCHES_URL, method=TrendReq.GET_METHOD, **self.requests_args)[pn]
result_df = pd.DataFrame(req_json)
return result_df

def today_searches(self, pn='US'):
"""Request data from Google Daily Trends section and returns a dataframe"""
forms = {'ns': 15, 'geo': pn, 'tz': '-180', 'hl': 'en-US'}
req_json = self._get_data(
url=TrendReq.TODAY_SEARCHES_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=forms,
**self.requests_args
)['default']['trendingSearchesDays'][0]['trendingSearches']
req_json = self._get_data(url=TrendReq.TODAY_SEARCHES_URL, method=TrendReq.GET_METHOD, params=forms,
**self.requests_args)['default']['trendingSearchesDays'][0]['trendingSearches']
result_df = pd.DataFrame()
# parse the returned json
sub_df = pd.DataFrame()
Expand All @@ -455,13 +427,8 @@ def top_charts(self, date, hl='en-US', tz=300, geo='GLOBAL'):
'isMobile': False}

# make the request and parse the returned json
req_json = self._get_data(
url=TrendReq.TOP_CHARTS_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=chart_payload,
**self.requests_args
)
req_json = self._get_data(url=TrendReq.TOP_CHARTS_URL, method=TrendReq.GET_METHOD, params=chart_payload,
**self.requests_args)
try:
df = pd.DataFrame(req_json['topCharts'][0]['listItems'])
except IndexError:
Expand All @@ -475,27 +442,17 @@ def suggestions(self, keyword):
kw_param = quote(keyword)
parameters = {'hl': self.hl}

req_json = self._get_data(
url=TrendReq.SUGGESTIONS_URL + kw_param,
params=parameters,
method=TrendReq.GET_METHOD,
trim_chars=5,
**self.requests_args
)['default']['topics']
req_json = self._get_data(url=TrendReq.SUGGESTIONS_URL + kw_param, method=TrendReq.GET_METHOD,
params=parameters, **self.requests_args)['default']['topics']
return req_json

def categories(self):
"""Request available categories data from Google's API and return a dictionary"""

params = {'hl': self.hl}

req_json = self._get_data(
url=TrendReq.CATEGORIES_URL,
params=params,
method=TrendReq.GET_METHOD,
trim_chars=5,
**self.requests_args
)
req_json = self._get_data(url=TrendReq.CATEGORIES_URL, method=TrendReq.GET_METHOD, params=params,
**self.requests_args)
return req_json

def get_historical_interest(self, keywords, year_start=2018, month_start=1,
Expand Down