Skip to content

Commit

Permalink
Merge pull request #27 from patrys/fix-normalization-and-improve-vali…
Browse files Browse the repository at this point in the history
…dation

Fix city area normalization and improve validation rules
  • Loading branch information
mociepka authored Nov 15, 2017
2 parents 2169370 + 311eb0f commit 3a20cc1
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 26 deletions.
78 changes: 52 additions & 26 deletions i18naddress/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from __future__ import unicode_literals

from collections import namedtuple
import json

import io
import json
import os
import re
from collections import defaultdict, namedtuple

VALID_COUNTRY_CODE = re.compile(r'^\w{2,3}$')
VALIDATION_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
Expand Down Expand Up @@ -80,6 +79,16 @@ def _make_choices(rules, translated=False):
return choices


def _compact_choices(choices, keys):
values = defaultdict(set)
for key, value in choices:
values[key].add(value)
return [
(key, value)
for key in keys
for value in sorted(values[key])]


def _match_choices(value, choices):
if value:
value = value.strip().lower()
Expand Down Expand Up @@ -133,14 +142,20 @@ def get_validation_rules(address):
postal_code_type = country_data['zip_name_type']
postal_code_prefix = country_data.get('postprefix', '')
# second level of data is for administrative areas
country_area_choices = _make_choices(country_data)
for language in languages:
localized_country_data = database['%s--%s' % (
country_code, language)]
country_area_choices += _make_choices(
localized_country_data, translated=True)
country_area = _match_choices(
address.get('country_area'), country_area_choices)
country_area = None
country_area_choices = []
if 'sub_keys' in country_data:
country_area_keys = country_data['sub_keys'].split('~')
country_area_choices = _make_choices(country_data)
for language in languages:
localized_country_data = database[
'%s--%s' % (country_code, language)]
country_area_choices += _make_choices(
localized_country_data, translated=True)
country_area_choices = _compact_choices(
country_area_choices, country_area_keys)
country_area = _match_choices(
address.get('country_area'), country_area_choices)
if country_area:
# third level of data is for cities
country_area_data = database['%s/%s' % (
Expand All @@ -150,14 +165,20 @@ def get_validation_rules(address):
re.compile('^' + country_area_data['zip']))
if 'zipex' in country_area_data:
postal_code_examples = country_area_data['zipex']
city_choices = _make_choices(country_area_data)
for language in languages:
localized_country_area_data = database['%s/%s--%s' % (
country_code, country_area, language)]
city_choices += _make_choices(
localized_country_area_data, translated=True)
city = _match_choices(
address.get('city'), city_choices)
city = None
city_choices = []
if 'sub_keys' in country_area_data:
city_keys = country_area_data['sub_keys'].split('~')
city_choices = _make_choices(country_area_data)
for language in languages:
localized_country_area_data = database['%s/%s--%s' % (
country_code, country_area, language)]
city_choices += _make_choices(
localized_country_area_data, translated=True)
city_choices = _compact_choices(
city_choices, city_keys)
city = _match_choices(
address.get('city'), city_choices)
if city:
# fourth level of data is for dependent sublocalities
city_data = database['%s/%s/%s' % (
Expand All @@ -167,12 +188,17 @@ def get_validation_rules(address):
re.compile('^' + city_data['zip']))
if 'zipex' in city_data:
postal_code_examples = city_data['zipex']
city_area_choices = _make_choices(city_data)
for language in languages:
localized_city_data = database['%s/%s/%s--%s' % (
country_code, country_area, city, language)]
city_area_choices += _make_choices(
localized_city_data, translated=True)
city_area_choices = []
if 'sub_keys' in city_data:
city_area_keys = city_data['sub_keys'].split('~')
city_area_choices = _make_choices(city_data)
for language in languages:
localized_city_data = database['%s/%s/%s--%s' % (
country_code, country_area, city, language)]
city_area_choices += _make_choices(
localized_city_data, translated=True)
city_area_choices = _compact_choices(
city_area_choices, city_area_keys)
return ValidationRules(
country_name,
address_format, address_latin_format,
Expand Down Expand Up @@ -323,7 +349,7 @@ def latinize_address(address, normalized=False):
country_code, country_area, city, city_area)
city_area_data = database.get(key)
if city_area_data:
cleaned_data['city_area'] = city_data.get(
cleaned_data['city_area'] = city_area_data.get(
'lname',
city_area_data.get('name', city_area))
return cleaned_data
25 changes: 25 additions & 0 deletions tests/test_i18naddress.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,31 @@ def test_dictionary_access():
assert state['name'] == 'Nevada'


def test_validation_rules_canada():
validation_data = get_validation_rules({'country_code': 'CA'})
assert validation_data.country_area_choices == [
('AB', 'Alberta'),
('BC', 'British Columbia'),
('BC', 'Colombie-Britannique'),
('MB', 'Manitoba'),
('NB', 'New Brunswick'),
('NB', 'Nouveau-Brunswick'),
('NL', 'Newfoundland and Labrador'),
('NL', 'Terre-Neuve-et-Labrador'),
('NT', 'Northwest Territories'),
('NT', 'Territoires du Nord-Ouest'),
('NS', 'Nouvelle-Écosse'),
('NS', 'Nova Scotia'),
('NU', 'Nunavut'),
('ON', 'Ontario'),
('PE', 'Prince Edward Island'),
('PE', 'Île-du-Prince-Édouard'),
('QC', 'Quebec'),
('QC', 'Québec'),
('SK', 'Saskatchewan'),
('YT', 'Yukon')]


def test_validation_rules_switzerland():
validation_data = get_validation_rules({'country_code': 'CH'})
assert validation_data.allowed_fields == {
Expand Down
16 changes: 16 additions & 0 deletions tests/test_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,22 @@ def test_address_latinization():
'street_address': '1600 Charleston Rd.'}
address = latinize_address(address)
assert address['country_area'] == 'California'
address = {
'country_code': 'CN',
'country_area': '云南省',
'postal_code': '677400',
'city': '临沧市',
'city_area': '凤庆县',
'street_address': '中关村东路1号'}
address = latinize_address(address)
assert address == {
'country_code': 'CN',
'country_area': 'Yunnan Sheng',
'postal_code': '677400',
'city': 'Lincang Shi',
'city_area': 'Fengqing Xian',
'street_address': '中关村东路1号',
'sorting_code': ''}
address = {
'name': 'Zhang San',
'company_name': 'Beijing Kid Toy Company',
Expand Down

0 comments on commit 3a20cc1

Please sign in to comment.