Skip to content

Commit

Permalink
Merge pull request #410 from ror-community/schema-v2-1
Browse files Browse the repository at this point in the history
Schema v2.1 implementation
  • Loading branch information
lizkrznarich authored Dec 3, 2024
2 parents 3e03b8c + 6032b12 commit 27d5352
Show file tree
Hide file tree
Showing 30 changed files with 1,096 additions and 161 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ boto3
pandas==1.4.1
numpy==1.22
titlecase==2.3
update_address @ git+https://github.com/ror-community/update_address.git@v2-locations
update_address @ git+https://github.com/ror-community/update_address.git@v2-1-locations
launchdarkly-server-sdk==7.6.1
jsonschema==3.2.0
python-magic
Expand Down
2 changes: 1 addition & 1 deletion rorapi/common/create_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from rorapi.management.commands.generaterorid import check_ror_id

V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json")
V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/schema-v2-1/ror_schema_v2_1.json")


def update_record(json_input, existing_record):
Expand Down
7 changes: 5 additions & 2 deletions rorapi/common/csv_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def process_csv(csv_file, version, validate_only):
success_msg = None
error = None
report = []
report_fields = ['row', 'ror_id', 'action', 'errors']
report_fields = ['row', 'issue_url', 'ror_id', 'action', 'errors']
skipped_count = 0
updated_count = 0
new_count = 0
Expand All @@ -57,10 +57,13 @@ def process_csv(csv_file, version, validate_only):
reader = csv.DictReader(io.StringIO(read_file))
row_num = 2
for row in reader:
html_url = None
ror_id = None
updated = False
print("Row data")
print(row)
if row['html_url']:
html_url = row['html_url']
if row['id']:
ror_id = row['id']
updated = True
Expand All @@ -86,7 +89,7 @@ def process_csv(csv_file, version, validate_only):
skipped_count += 1
if validate_only and action == 'created':
ror_id = None
report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
row_num += 1
if new_count > 0 or updated_count > 0 or skipped_count > 0:
try:
Expand Down
9 changes: 0 additions & 9 deletions rorapi/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,6 @@ def __init__(self, data):
self.count = data.doc_count


class Aggregations:
"""Aggregations model class"""

def __init__(self, data):
self.types = [TypeBucket(b) for b in data.types.buckets]
self.countries = [CountryBucket(b) for b in data.countries.buckets]
self.statuses = [StatusBucket(b) for b in data.statuses.buckets]


class Errors:
"""Errors model class"""

Expand Down
23 changes: 0 additions & 23 deletions rorapi/common/parsers.py

This file was deleted.

77 changes: 49 additions & 28 deletions rorapi/common/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from urllib.parse import unquote

ALLOWED_FILTERS = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status")
ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status")
ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name")
ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status")
ALLOWED_ALL_STATUS_VALUES = ("", "true", "false")
# includes deprecated ext id types
Expand Down Expand Up @@ -99,11 +100,15 @@
"links.type",
"links.value",
"locations.geonames_id",
"locations.geonames_details.name",
"locations.geonames_details.lat",
"locations.geonames_details.lng",
"locations.geonames_details.continent_code",
"locations.geonames_details.continent_name",
"locations.geonames_details.country_code",
"locations.geonames_details.country_name",
"locations.geonames_details.country_subdivision_code",
"locations.geonames_details.country_subdivision_name",
"locations.geonames_details.lat",
"locations.geonames_details.lng",
"locations.geonames_details.name",
"names.lang",
"names.types",
"names.value",
Expand Down Expand Up @@ -150,34 +155,46 @@ def check_status_adv_q(adv_q_string):
status_in_q = True
return status_in_q

def get_country_name_filters(country_name_field, filter_string, version):
country_name_filters = []
if version == "v1":
allowed_filters = ALLOWED_FILTERS_V1
else:
allowed_filters = ALLOWED_FILTERS_V2
search = re.findall(country_name_field + ":([^:]*)", filter_string)
if search:
for s in search:
if len(re.findall(",", s)) > 1:
s = s.rsplit(",", 1)[0]
for allowed_filter in allowed_filters:
if allowed_filter in s:
s = s.rsplit("," + allowed_filter, 1)[0]
country_name_filter = country_name_field + ":" + s
filter_string = filter_string.replace(country_name_filter, "")
country_name_filters.append(country_name_filter)
return country_name_filters


def filter_string_to_list(filter_string, version):
filter_list = []
if "country.country_code" in filter_string and version == "v2":
filter_string = filter_string.replace(
"country.country_code", "locations.geonames_details.country_code"
)
if version == "v2":
if "country.country_code" in filter_string:
filter_string = filter_string.replace(
"country.country_code", "locations.geonames_details.country_code"
)
if "country.country_name" in filter_string:
filter_string = filter_string.replace(
"country.country_name", "locations.geonames_details.country_name"
)
# some country names contain comma chars
# allow comma chars in country_name filter values only
# country.country_name:Germany,types:Company
if "country.country_name" in filter_string:
country_name_filters = []
search = re.findall("country.country_name:([^:]*)", filter_string)
if search:
for s in search:
if len(re.findall(",", s)) > 1:
s = s.rsplit(",", 1)[0]
for allowed_filter in ALLOWED_FILTERS:
if allowed_filter in s:
s = s.rsplit("," + allowed_filter, 1)[0]
country_name_filter = "country.country_name:" + s
v2_country_name_filter = "locations.geonames_details.country_name:" + s
filter_string = filter_string.replace(country_name_filter, "")
if version == "v2":
country_name_filters.append(v2_country_name_filter)
else:
country_name_filters.append(country_name_filter)

if version == "v1":
country_name_field = "country.country_name"
else:
country_name_field = "locations.geonames_details.country_code"
if country_name_field in filter_string:
country_name_filters = get_country_name_filters(country_name_field, filter_string, version)
filter_list = [f for f in filter_string.split(",") if f]
filter_list = filter_list + country_name_filters
else:
Expand Down Expand Up @@ -240,7 +257,11 @@ def validate(params, version):

valid_filters = [f for f in filters if ":" in f]
filter_keys = [f.split(":")[0] for f in valid_filters]
illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS]
if version == "v1":
allowed_filters = ALLOWED_FILTERS_V1
else:
allowed_filters = ALLOWED_FILTERS_V2
illegal_keys = [v for v in filter_keys if v not in allowed_filters]
errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys])

if "page" in params:
Expand Down Expand Up @@ -323,6 +344,7 @@ def build_search_query(params, version):
[
("types", "types"),
("countries", "locations.geonames_details.country_code"),
("continents", "locations.geonames_details.continent_code"),
("statuses", "status"),
]
)
Expand All @@ -337,7 +359,6 @@ def build_search_query(params, version):

sort_field = params.get("sort", "id")
sort_order = params.get("order", "asc")
qb.add_sort(sort_field, sort_order)

qb.paginate(int(params.get("page", 1)))
return qb.get_query()
Expand Down
6 changes: 0 additions & 6 deletions rorapi/common/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,5 @@ class BucketSerializer(serializers.Serializer):
count = serializers.IntegerField()


class AggregationsSerializer(serializers.Serializer):
types = BucketSerializer(many=True)
countries = BucketSerializer(many=True)
statuses = BucketSerializer(many=True)


class ErrorsSerializer(serializers.Serializer):
errors = serializers.StringRelatedField(many=True)
6 changes: 3 additions & 3 deletions rorapi/tests/tests_integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ def test_retrieval(self):

def test_query_grid_retrieval(self):
for test_org in requests.get(BASE_URL).json()['items']:
grid = test_org['external_ids']['GRID']['preferred']
output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
self.verify_single_item(output, test_org)
grid = test_org['external_ids']['GRID']['preferred']
output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
self.verify_single_item(output, test_org)

def test_error(self):
output = requests.get(BASE_URL, {
Expand Down
2 changes: 1 addition & 1 deletion rorapi/tests/tests_integration/tests_search_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_typos(self):
'query': 'julius~ maximilian~ universitat~ wuerzburg~'
}).json()
self.assertTrue(items['number_of_results'] > 0)
self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')

def test_extra_word(self):
items = requests.get(
Expand Down
2 changes: 1 addition & 1 deletion rorapi/tests/tests_integration/tests_search_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_typos(self):
'query': 'julius~ maximilian~ universitat~ wuerzburg~'
}).json()
self.assertTrue(items['number_of_results'] > 0)
self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')

def test_extra_word(self):
items = requests.get(
Expand Down
Loading

0 comments on commit 27d5352

Please sign in to comment.