Skip to content

Commit

Permalink
Merge branch 'master' into sv-pvmap
Browse files Browse the repository at this point in the history
  • Loading branch information
ajaits authored Jan 2, 2025
2 parents f3eb993 + 9177e6d commit 2243ed8
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 22 deletions.
8 changes: 7 additions & 1 deletion scripts/us_census/pep/population_estimates_by_asr/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,13 +273,19 @@ def process(self):
final_df.drop(columns=['Unnamed: 0'], inplace=True)
final_df = final_df.dropna()
final_df['Year'] = final_df['Year'].astype(float).astype(int)
logging.info(f"Sorting data {final_df.shape} by year, geo-id")
final_df = final_df.sort_values(by=['Year', 'geo_ID'])
logging.info(f"Setting measurement method")
final_df = _measurement_method(final_df)
logging.info(
f"Writing data {final_df.shape} to {self._cleaned_csv_file_path}"
)
final_df.to_csv(self._cleaned_csv_file_path, index=False)
sv_list = list(set(sv_list))
logging.info(f"Generating MCF for {len(sv_list)}")
sv_list.sort()
logging.info(f"----Generating MCF and TMCF----")
self._generate_mcf(sv_list)
logging.info(f"Generating TMCF")
self._generate_tmcf()
else:
logging.fatal(
Expand Down
25 changes: 15 additions & 10 deletions tools/download_utils/requests_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import time


def request_url_json(url: str, max_retries: int = 3, retry_interval: int = 5) -> dict:
def request_url_json(url: str,
max_retries: int = 3,
retry_interval: int = 5) -> dict:
"""Get JSON object version of reponse to GET request to given URL.
Handles exception ReadTimeout.
Args:
Expand All @@ -43,15 +45,18 @@ def request_url_json(url: str, max_retries: int = 3, retry_interval: int = 5) ->
logging.error('HTTP status code: ' + str(req.status_code))
return response_data
except requests.exceptions.ReadTimeout:
if max_retries> 0:
logging.warning('Timeout occoured, retrying after 10s.')
time.sleep(10)
return request_url_json(url, max_retries - 1, retry_interval)
if max_retries > 0:
logging.warning('Timeout occoured, retrying after 10s.')
time.sleep(10)
return request_url_json(url, max_retries - 1, retry_interval)
else:
return {}
return {}


def request_post_json(url: str, data_: dict, max_retries: int = 3, retry_interval: int = 5) -> dict:
def request_post_json(url: str,
data_: dict,
max_retries: int = 3,
retry_interval: int = 5) -> dict:
"""Get JSON object version of reponse to POST request to given URL.
Args:
Expand All @@ -73,17 +78,17 @@ def request_post_json(url: str, data_: dict, max_retries: int = 3, retry_interva
req = requests.post(url, data=json.dumps(data_), headers=headers)
logging.info('Post request url: %s', req.request.url)
except requests.exceptions.ConnectionError:
logging.warning(f'Timeout occoured, retrying after {retry_interval}s.')
logging.warning(
f'Timeout occoured, retrying after {retry_interval}s.')
time.sleep(retry_interval)
retry += 1
continue

if retry >= max_retries:
logging.warning('Max retries exceeded. Returning empty response')
logging.warning('Max retries exceeded. Returning empty response')
elif req.status_code == requests.codes.ok:
response_data = req.json()
else:
response_data = {'http_err_code': req.status_code}
logging.error('Error: HTTP status code: %s', str(req.status_code))
return response_data

12 changes: 6 additions & 6 deletions tools/statvar_importer/mcf_file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,13 @@ def add_pv_to_node(
value_list = get_value_list(value)
if value_list:
if len(value_list) == 1:
value = value_list[0]
value = value_list[0]
else:
# Add each value recursively.
for v in value_list:
add_pv_to_node(prop, v, node, append_value, strip_namespaces,
normalize)
return node
# Add each value recursively.
for v in value_list:
add_pv_to_node(prop, v, node, append_value,
strip_namespaces, normalize)
return node
# allow empty values
# if not value:
# return node
Expand Down
11 changes: 6 additions & 5 deletions tools/statvar_importer/ngram_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def lookup(
"""
normalized_key = self._normalize_string(key)
ngrams = self._get_ngrams(normalized_key)
logging.level_debug() and logging.log(2,
f'looking up ngrams {ngrams} for {key}')
logging.level_debug() and logging.log(
2, f'looking up ngrams {ngrams} for {key}')
lookup_config = self._config
if config:
# Use the match config passed in.
Expand Down Expand Up @@ -129,7 +129,8 @@ def lookup(
key_match['ngram_pos'] = min(key_match['ngram_pos'],
ngram_pos)

logging.level_debug() and logging.log(2, f'Matches for {key}: {matches}')
logging.level_debug() and logging.log(2,
f'Matches for {key}: {matches}')
# Collect all key indices that matches with counts.
match_indices = list()
min_matches = max(
Expand All @@ -144,8 +145,8 @@ def lookup(
match_indices.sort(
key=lambda x: self._get_ngram_match_score(x[1], key_len),
reverse=True)
logging.level_debug() and logging.log(2,
f'Sorted matches for {key}: {match_indices}')
logging.level_debug() and logging.log(
2, f'Sorted matches for {key}: {match_indices}')

# Collect results in sorted order
results = list()
Expand Down

0 comments on commit 2243ed8

Please sign in to comment.