From f7e5c0c37c9883c28bae76d934fbe831c853e461 Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Thu, 14 Nov 2024 19:22:50 -0600 Subject: [PATCH 1/7] support indexing and returning records with v2.1 locations --- requirements.txt | 2 +- rorapi/common/create_update.py | 2 +- rorapi/common/parsers.py | 23 ----------------------- rorapi/common/queries.py | 10 +++++++--- rorapi/common/views.py | 2 +- rorapi/v2/index_template_es7.json | 24 ++++++++++++++++++------ rorapi/v2/models.py | 8 ++++++-- rorapi/v2/serializers.py | 4 ++++ 8 files changed, 38 insertions(+), 37 deletions(-) delete mode 100644 rorapi/common/parsers.py diff --git a/requirements.txt b/requirements.txt index 84b7ffe..5844ea8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ boto3 pandas==1.4.1 numpy==1.22 titlecase==2.3 -update_address @ git+https://github.com/ror-community/update_address.git@v2-locations +update_address @ git+https://github.com/ror-community/update_address.git@v2-1-locations launchdarkly-server-sdk==7.6.1 jsonschema==3.2.0 python-magic diff --git a/rorapi/common/create_update.py b/rorapi/common/create_update.py index 6f67be9..0bfc7cc 100644 --- a/rorapi/common/create_update.py +++ b/rorapi/common/create_update.py @@ -8,7 +8,7 @@ ) from rorapi.management.commands.generaterorid import check_ror_id -V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json") +V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/schema-v2-1/ror_schema_v2_1.json") def update_record(json_input, existing_record): diff --git a/rorapi/common/parsers.py b/rorapi/common/parsers.py deleted file mode 100644 index d00ed17..0000000 --- a/rorapi/common/parsers.py +++ /dev/null @@ -1,23 +0,0 @@ -import jsonschema -import requests -from rest_framework.exceptions import ParseError -from rest_framework.parsers import JSONParser - - -class JSONSchemaParser(JSONParser): - - def get_file_from_url(self, url): - rsp = requests.get(url) - rsp.raise_for_status() - return rsp.json() - - def parse(self, stream, media_type=None, parser_context=None): - schema = self.get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json") - data = super(JSONSchemaParser, self).parse(stream, media_type, - parser_context) - try: - jsonschema.validate(data, schema) - except jsonschema.ValidationError as error: - raise ParseError(detail=error.message) - else: - return data \ No newline at end of file diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py index 23c47b7..a535564 100644 --- a/rorapi/common/queries.py +++ b/rorapi/common/queries.py @@ -99,11 +99,15 @@ "links.type", "links.value", "locations.geonames_id", - "locations.geonames_details.name", - "locations.geonames_details.lat", - "locations.geonames_details.lng", + "locations.geonames_details.continent_code", + "locations.geonames_details.continent_name", "locations.geonames_details.country_code", "locations.geonames_details.country_name", + "locations.geonames_details.country_subdivision_code", + "locations.geonames_details.country_subdivision_name", + "locations.geonames_details.lat", + "locations.geonames_details.lng", + "locations.geonames_details.name", "names.lang", "names.types", "names.value", diff --git a/rorapi/common/views.py b/rorapi/common/views.py index e163012..160ff74 100644 --- a/rorapi/common/views.py +++ b/rorapi/common/views.py @@ -184,7 +184,7 @@ def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): class GenerateAddress(APIView): - permission_classes = [OurTokenPermission] + #permission_classes = [OurTokenPermission] def get(self, request, geonamesid, version=REST_FRAMEWORK["DEFAULT_VERSION"]): if version == 'v2': diff --git a/rorapi/v2/index_template_es7.json b/rorapi/v2/index_template_es7.json index c2d29a4..b29075c 100644 --- a/rorapi/v2/index_template_es7.json +++ b/rorapi/v2/index_template_es7.json @@ -89,13 +89,10 @@ }, "geonames_details": { "properties": { - "lat": { - "type": "float" - }, - "lng": { - "type": "float" + "continent_code": { + "type": "keyword" }, - "name": { + "continent_name": { "type": "keyword" }, "country_code": { @@ -103,6 +100,21 @@ }, "country_name": { "type": "keyword" + }, + "country_subdivision_code": { + "type": "keyword" + }, + "country_subdivision_name": { + "type": "keyword" + }, + "lat": { + "type": "float" + }, + "lng": { + "type": "float" + }, + "name": { + "type": "keyword" } } } diff --git a/rorapi/v2/models.py b/rorapi/v2/models.py index 3a13e19..4c14917 100644 --- a/rorapi/v2/models.py +++ b/rorapi/v2/models.py @@ -6,11 +6,15 @@ class GeoNamesDetails: """A model class for storing geonames city hash""" def __init__(self, data): + self.continent_code = data.country_code + self.continent_name = data.country_name + self.country_code = data.country_code + self.country_name = data.country_name + self.country_subdivision_code = data.country_code + self.country_subdivision_name = data.country_name self.name = data.name self.lat = data.lat self.lng = data.lng - self.country_code = data.country_code - self.country_name = data.country_name class Location: diff --git a/rorapi/v2/serializers.py b/rorapi/v2/serializers.py index 07d7600..43e109d 100644 --- a/rorapi/v2/serializers.py +++ b/rorapi/v2/serializers.py @@ -33,8 +33,12 @@ class LinkSerializer(serializers.Serializer): class GeoNamesDetailsSerializer(serializers.Serializer): + continent_code = serializers.CharField() + continent_name = serializers.CharField() country_code = serializers.CharField() country_name = serializers.CharField() + country_subdivision_code = serializers.CharField() + country_subdivision_name = serializers.CharField() lat = serializers.DecimalField( max_digits=None, decimal_places=10, coerce_to_string=False ) From b14056ca66eda3f5ab234ecbaa53d91fe1655855 Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Fri, 15 Nov 2024 16:40:21 -0600 Subject: [PATCH 2/7] update bulk create report and v2 test data --- rorapi/common/csv_bulk.py | 7 ++-- .../data/test_data_create_valid.json | 4 +++ .../data/test_data_new_record_invalid_v2.json | 4 +++ .../data/test_data_new_record_valid_v2.json | 4 +++ .../data/test_data_retrieve_es7_v2.json | 4 +++ .../data/test_data_search_es7_v2.json | 4 +++ .../tests_unit/data/test_update_valid.json | 4 +++ rorapi/tests/tests_unit/tests_models_v2.py | 32 +++++++++++++++---- 8 files changed, 55 insertions(+), 8 deletions(-) diff --git a/rorapi/common/csv_bulk.py b/rorapi/common/csv_bulk.py index ff42d70..89abc63 100644 --- a/rorapi/common/csv_bulk.py +++ b/rorapi/common/csv_bulk.py @@ -48,7 +48,7 @@ def process_csv(csv_file, version, validate_only): success_msg = None error = None report = [] - report_fields = ['row', 'ror_id', 'action', 'errors'] + report_fields = ['row', 'issue_url', 'ror_id', 'action', 'errors'] skipped_count = 0 updated_count = 0 new_count = 0 @@ -57,10 +57,13 @@ def process_csv(csv_file, version, validate_only): reader = csv.DictReader(io.StringIO(read_file)) row_num = 2 for row in reader: + html_url = None ror_id = None updated = False print("Row data") print(row) + if row['html_url']: + html_url = row['html_url'] if row['id']: ror_id = row['id'] updated = True @@ -86,7 +89,7 @@ def process_csv(csv_file, version, validate_only): skipped_count += 1 if validate_only and action == 'created': ror_id = None - report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''}) + report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''}) row_num += 1 if new_count > 0 or updated_count > 0 or skipped_count > 0: try: diff --git a/rorapi/tests/tests_unit/data/test_data_create_valid.json b/rorapi/tests/tests_unit/data/test_data_create_valid.json index 533d944..1ba02be 100644 --- a/rorapi/tests/tests_unit/data/test_data_create_valid.json +++ b/rorapi/tests/tests_unit/data/test_data_create_valid.json @@ -3,8 +3,12 @@ { "geonames_id": 2661552, "geonames_details": { + "continent_code": "EU", + "contient_name": "Europe", "country_code": "CH", "country_name": "Switzerland", + "country_subdivision_code": "BE", + "country_subdivision_name": "Bern", "lat": 46.94809, "lng": 7.44744, "name": "Bern" diff --git a/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json b/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json index 61f3f2a..440ccb2 100644 --- a/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json @@ -3,8 +3,12 @@ { "geonames_id": 2661552, "geonames_details": { + "continent_code": "EU", + "contient_name": "Europe", "country_code": "CH", "country_name": "Switzerland", + "country_subdivision_code": "BE", + "country_subdivision_name": "Bern", "lat": 46.94809, "lng": 7.44744, "name": "Bern" diff --git a/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json b/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json index a349173..6b9fdc1 100644 --- a/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json @@ -3,8 +3,12 @@ { "geonames_id": 2661552, "geonames_details": { + "continent_code": "EU", + "contient_name": "Europe", "country_code": "CH", "country_name": "Switzerland", + "country_subdivision_code": "BE", + "country_subdivision_name": "Bern", "lat": 46.94809, "lng": 7.44744, "name": "Bern" diff --git a/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json b/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json index 36a934f..39c207b 100644 --- a/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json @@ -72,8 +72,12 @@ "locations": [ { "geonames_details": { + "continent_code": "NA", + "contient_name": "North America", "country_code": "US", "country_name": "United States", + "country_subdivision_code": "DC", + "country_subdivision_name": "District of Columbia", "lat": 38.88758, "lng": -77.025728, "name": "Washington, D.C." diff --git a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json index 8b41cbc..7d092f3 100644 --- a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json @@ -24,8 +24,12 @@ { "geonames_id": 2158177, "geonames_details": { + "continent_code": "OC", + "contient_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.9083, "lng": 145.138, "name": "Melbourne" diff --git a/rorapi/tests/tests_unit/data/test_update_valid.json b/rorapi/tests/tests_unit/data/test_update_valid.json index 533d944..1ba02be 100644 --- a/rorapi/tests/tests_unit/data/test_update_valid.json +++ b/rorapi/tests/tests_unit/data/test_update_valid.json @@ -3,8 +3,12 @@ { "geonames_id": 2661552, "geonames_details": { + "continent_code": "EU", + "contient_name": "Europe", "country_code": "CH", "country_name": "Switzerland", + "country_subdivision_code": "BE", + "country_subdivision_name": "Bern", "lat": 46.94809, "lng": 7.44744, "name": "Bern" diff --git a/rorapi/tests/tests_unit/tests_models_v2.py b/rorapi/tests/tests_unit/tests_models_v2.py index b0f9958..d18bc1f 100644 --- a/rorapi/tests/tests_unit/tests_models_v2.py +++ b/rorapi/tests/tests_unit/tests_models_v2.py @@ -30,11 +30,15 @@ def test_attributes_exist(self): { "geonames_id": 5911606, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", + "country_name": "Gallifrey", + "country_code": "GE", + "country_subdivision_code": "BU", + "country_subdivision_name": "Burnaby State", "lat": "49.198027", "lng": "-123.007714", "name": "Burnaby", - "country_name": "Gallifrey", - "country_code": "GE", }, } ], @@ -71,12 +75,12 @@ def test_attributes_exist(self): self.assertIn(organization.types[i], data["types"]) self.assertEqual(organization.established, data["established"]) self.assertEqual( - organization.locations[0].geonames_details.lat, - data["locations"][0]["geonames_details"]["lat"], + organization.locations[0].geonames_details.continent_code, + data["locations"][0]["geonames_details"]["continent_code"], ) self.assertEqual( - organization.locations[0].geonames_details.lng, - data["locations"][0]["geonames_details"]["lng"], + organization.locations[0].geonames_details.continent_name, + data["locations"][0]["geonames_details"]["continent_name"], ) self.assertEqual( organization.locations[0].geonames_details.country_code, @@ -86,6 +90,22 @@ def test_attributes_exist(self): organization.locations[0].geonames_details.country_name, data["locations"][0]["geonames_details"]["country_name"], ) + self.assertEqual( + organization.locations[0].geonames_details.country_subdivision_code, + data["locations"][0]["geonames_details"]["country_subdivision_code"], + ) + self.assertEqual( + organization.locations[0].geonames_details.country_subdivision_name, + data["locations"][0]["geonames_details"]["country_subdivision_name"], + ) + self.assertEqual( + organization.locations[0].geonames_details.lat, + data["locations"][0]["geonames_details"]["lat"], + ) + self.assertEqual( + organization.locations[0].geonames_details.lng, + data["locations"][0]["geonames_details"]["lng"], + ) self.assertEqual( organization.locations[0].geonames_details.name, data["locations"][0]["geonames_details"]["name"], From 5c5d60c2e197cfd2745a7ddca6b4d2683b6adfeb Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Mon, 18 Nov 2024 16:44:55 -0600 Subject: [PATCH 3/7] fix unit tests for v2.1 locations update --- rorapi/common/views.py | 2 +- .../data/test_data_retrieve_es7_v2.json | 2 +- .../data/test_data_search_es7_v2.json | 78 ++++++++++++++++++- rorapi/tests/tests_unit/tests_queries_v1.py | 3 +- rorapi/tests/tests_unit/tests_queries_v2.py | 3 +- rorapi/v2/models.py | 8 +- 6 files changed, 87 insertions(+), 9 deletions(-) diff --git a/rorapi/common/views.py b/rorapi/common/views.py index 160ff74..e163012 100644 --- a/rorapi/common/views.py +++ b/rorapi/common/views.py @@ -184,7 +184,7 @@ def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): class GenerateAddress(APIView): - #permission_classes = [OurTokenPermission] + permission_classes = [OurTokenPermission] def get(self, request, geonamesid, version=REST_FRAMEWORK["DEFAULT_VERSION"]): if version == 'v2': diff --git a/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json b/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json index 39c207b..dff1b2a 100644 --- a/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json @@ -73,7 +73,7 @@ { "geonames_details": { "continent_code": "NA", - "contient_name": "North America", + "continent_name": "North America", "country_code": "US", "country_name": "United States", "country_subdivision_code": "DC", diff --git a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json index 7d092f3..4ce8d6f 100644 --- a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json @@ -25,7 +25,7 @@ "geonames_id": 2158177, "geonames_details": { "continent_code": "OC", - "contient_name": "Oceania", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", "country_subdivision_code": "VIC", @@ -224,8 +224,12 @@ { "geonames_id": 2174003, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -27.495964, "lng": 153.009627, "name": "Brisbane" @@ -408,8 +412,12 @@ { "geonames_id": 2147714, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -33.775259, "lng": 151.112915, "name": "Sydney" @@ -539,8 +547,12 @@ { "geonames_id": 2171507, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -34.405147, "lng": 150.878387, "name": "Wollongong" @@ -695,8 +707,12 @@ { "geonames_id": 2163355, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -42.902093, "lng": 147.332497, "name": "Hobart" @@ -845,8 +861,12 @@ { "geonames_id": 2078025, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -35.024038, "lng": 138.572615, "name": "Adelaide" @@ -995,8 +1015,12 @@ { "geonames_id": 2158177, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.806748, "lng": 144.962573, "name": "Melbourne" @@ -1154,8 +1178,12 @@ { "geonames_id": 2158177, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.72179, "lng": 145.047909, "name": "Melbourne" @@ -1280,8 +1308,12 @@ { "geonames_id": 2158177, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.794167, "lng": 144.9, "name": "Melbourne" @@ -1400,8 +1432,12 @@ { "geonames_id": 2177671, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -30.489949, "lng": 151.641018, "name": "Armidale" @@ -1541,8 +1577,12 @@ { "geonames_id": 7932622, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.8493578, "lng": 145.1127097, "name": "Burwood" @@ -1670,8 +1710,12 @@ { "geonames_id": 2174003, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -27.470609, "lng": 153.022851, "name": "Brisbane" @@ -1804,8 +1848,12 @@ { "geonames_id": 2151437, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -23.322705, "lng": 150.520802, "name": "Rockhampton" @@ -1934,8 +1982,12 @@ { "geonames_id": 2078025, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -34.92049, "lng": 138.60678, "name": "Adelaide" @@ -2075,8 +2127,12 @@ { "geonames_id": 2158177, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.821111, "lng": 145.038333, "name": "Melbourne" @@ -2209,8 +2265,12 @@ { "geonames_id": 2165087, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -28.073983, "lng": 153.416489, "name": "Gold Coast" @@ -2330,8 +2390,12 @@ { "geonames_id": 2176632, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -33.4300248, "lng": 149.5655646, "name": "Bathurst" @@ -2456,8 +2520,12 @@ { "geonames_id": 2172517, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -35.238889, "lng": 149.0875, "name": "Canberra" @@ -2587,8 +2655,12 @@ { "geonames_id": 2177091, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -37.5761, "lng": 143.846, "name": "Ballarat" @@ -2701,8 +2773,12 @@ { "geonames_id": 2073124, "geonames_details": { + "continent_code": "OC", + "continent_name": "Oceania", "country_code": "AU", "country_name": "Australia", + "country_subdivision_code": "VIC", + "country_subdivision_name": "Victoria", "lat": -12.3716524297692, "lng": 130.868711471558, "name": "Darwin" diff --git a/rorapi/tests/tests_unit/tests_queries_v1.py b/rorapi/tests/tests_unit/tests_queries_v1.py index 5030c39..e85cf62 100644 --- a/rorapi/tests/tests_unit/tests_queries_v1.py +++ b/rorapi/tests/tests_unit/tests_queries_v1.py @@ -173,7 +173,8 @@ def setUp(self): self.default_query = \ {'aggs': {'types': {'terms': {'field': 'types', 'size': 10, 'min_doc_count': 1}}, 'countries': {'terms': {'field': 'country.country_code', 'size': 10, 'min_doc_count': 1}}, - 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, 'track_total_hits': True, 'from': 0, 'size': 20} + 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, + 'sort': [{'id': {'order': 'asc'}}], 'track_total_hits': True, 'from': 0, 'size': 20} def test_empty_query_default(self): expected = {'query': { diff --git a/rorapi/tests/tests_unit/tests_queries_v2.py b/rorapi/tests/tests_unit/tests_queries_v2.py index b3bca45..3351aa6 100644 --- a/rorapi/tests/tests_unit/tests_queries_v2.py +++ b/rorapi/tests/tests_unit/tests_queries_v2.py @@ -173,7 +173,8 @@ def setUp(self): self.default_query = \ {'aggs': {'types': {'terms': {'field': 'types', 'size': 10, 'min_doc_count': 1}}, 'countries': {'terms': {'field': 'locations.geonames_details.country_code', 'size': 10, 'min_doc_count': 1}}, - 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, 'track_total_hits': True, 'from': 0, 'size': 20} + 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, + 'sort': [{'id': {'order': 'asc'}}], 'track_total_hits': True, 'from': 0, 'size': 20} def test_empty_query_default(self): expected = {'query': { diff --git a/rorapi/v2/models.py b/rorapi/v2/models.py index 4c14917..1c4b8bf 100644 --- a/rorapi/v2/models.py +++ b/rorapi/v2/models.py @@ -6,12 +6,12 @@ class GeoNamesDetails: """A model class for storing geonames city hash""" def __init__(self, data): - self.continent_code = data.country_code - self.continent_name = data.country_name + self.continent_code = data.continent_code + self.continent_name = data.continent_name self.country_code = data.country_code self.country_name = data.country_name - self.country_subdivision_code = data.country_code - self.country_subdivision_name = data.country_name + self.country_subdivision_code = data.country_subdivision_code + self.country_subdivision_name = data.country_subdivision_name self.name = data.name self.lat = data.lat self.lng = data.lng From c00dcd590ee8bf88c4f5f5fbd7844ad6ff1e2097 Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Tue, 19 Nov 2024 17:39:48 -0600 Subject: [PATCH 4/7] add new aggregations and update tests --- rorapi/common/models.py | 9 +++ rorapi/common/queries.py | 12 ++- rorapi/common/serializers.py | 6 -- .../data/test_data_search_es7_v2.json | 10 +++ .../tests/tests_unit/tests_models_common.py | 62 +------------- rorapi/tests/tests_unit/tests_models_v1.py | 70 +++++++++++++++- rorapi/tests/tests_unit/tests_models_v2.py | 81 ++++++++++++++++++- rorapi/tests/tests_unit/tests_queries_v2.py | 9 +++ rorapi/tests/tests_unit/tests_views_v2.py | 8 ++ rorapi/v1/models.py | 18 ++++- rorapi/v1/serializers.py | 7 +- rorapi/v2/models.py | 41 ++++++++-- rorapi/v2/record_constants.py | 14 ++++ rorapi/v2/serializers.py | 10 ++- 14 files changed, 276 insertions(+), 81 deletions(-) diff --git a/rorapi/common/models.py b/rorapi/common/models.py index 88b050d..d0e8140 100644 --- a/rorapi/common/models.py +++ b/rorapi/common/models.py @@ -30,6 +30,14 @@ def __init__(self, data): pass self.count = data.doc_count +class ContinentBucket: + """A model class for country aggregation bucket""" + + def __init__(self, data): + self.id = data.key.lower() + self.title = data.key + self.count = data.doc_count + class StatusBucket: """A model class for status aggregation bucket""" @@ -46,6 +54,7 @@ class Aggregations: def __init__(self, data): self.types = [TypeBucket(b) for b in data.types.buckets] self.countries = [CountryBucket(b) for b in data.countries.buckets] + self.continents = [ContinentBucket(b) for b in data.continents.buckets] self.statuses = [StatusBucket(b) for b in data.statuses.buckets] diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py index a535564..812098e 100644 --- a/rorapi/common/queries.py +++ b/rorapi/common/queries.py @@ -18,7 +18,8 @@ from urllib.parse import unquote -ALLOWED_FILTERS = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status") +ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status") +ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name") ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status") ALLOWED_ALL_STATUS_VALUES = ("", "true", "false") # includes deprecated ext id types @@ -157,6 +158,14 @@ def check_status_adv_q(adv_q_string): def filter_string_to_list(filter_string, version): filter_list = [] + if "continent_code" in filter_string and version == "v2": + filter_string = filter_string.replace( + "continent_code", "locations.geonames_details.continent_code" + ) + if "continent_name" in filter_string and version == "v2": + filter_string = filter_string.replace( + "continent_name", "locations.geonames_details.continent_name" + ) if "country.country_code" in filter_string and version == "v2": filter_string = filter_string.replace( "country.country_code", "locations.geonames_details.country_code" @@ -327,6 +336,7 @@ def build_search_query(params, version): [ ("types", "types"), ("countries", "locations.geonames_details.country_code"), + ("continents", "locations.geonames_details.continent_code"), ("statuses", "status"), ] ) diff --git a/rorapi/common/serializers.py b/rorapi/common/serializers.py index aa49ca8..ebf0cf3 100644 --- a/rorapi/common/serializers.py +++ b/rorapi/common/serializers.py @@ -13,11 +13,5 @@ class BucketSerializer(serializers.Serializer): count = serializers.IntegerField() -class AggregationsSerializer(serializers.Serializer): - types = BucketSerializer(many=True) - countries = BucketSerializer(many=True) - statuses = BucketSerializer(many=True) - - class ErrorsSerializer(serializers.Serializer): errors = serializers.StringRelatedField(many=True) diff --git a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json index 4ce8d6f..c25ed4e 100644 --- a/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json +++ b/rorapi/tests/tests_unit/data/test_data_search_es7_v2.json @@ -2953,6 +2953,16 @@ "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [] + }, + "continents": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "OC", + "doc_count": 20 + } + ] } } } \ No newline at end of file diff --git a/rorapi/tests/tests_unit/tests_models_common.py b/rorapi/tests/tests_unit/tests_models_common.py index d0c9dd8..a0afb3e 100644 --- a/rorapi/tests/tests_unit/tests_models_common.py +++ b/rorapi/tests/tests_unit/tests_models_common.py @@ -1,6 +1,6 @@ from django.test import SimpleTestCase -from rorapi.common.models import Aggregations, CountryBucket, Entity, Errors, TypeBucket +from rorapi.common.models import CountryBucket, Entity, Errors, TypeBucket from .utils import AttrDict @@ -39,66 +39,6 @@ def test_attributes_exist(self): self.assertEqual(bucket.count, 4821) -class AggregationsTestCase(SimpleTestCase): - def test_attributes_exist(self): - aggr = Aggregations( - AttrDict( - { - "types": { - "buckets": [ - {"key": "TyPE 1", "doc_count": 482}, - {"key": "Type2", "doc_count": 42}, - ] - }, - "countries": { - "buckets": [ - {"key": "IE", "doc_count": 48212}, - {"key": "FR", "doc_count": 4821}, - {"key": "GB", "doc_count": 482}, - {"key": "US", "doc_count": 48}, - ] - }, - "statuses": { - "buckets": [ - {"key": "active", "doc_count": 102927}, - {"key": "inactive", "doc_count": 3}, - {"key": "withdrawn", "doc_count": 2}, - ] - }, - } - ) - ) - self.assertEqual(len(aggr.types), 2) - self.assertEqual(aggr.types[0].id, "type 1") - self.assertEqual(aggr.types[0].title, "TyPE 1") - self.assertEqual(aggr.types[0].count, 482) - self.assertEqual(aggr.types[1].id, "type2") - self.assertEqual(aggr.types[1].title, "Type2") - self.assertEqual(aggr.types[1].count, 42) - self.assertEqual(len(aggr.countries), 4) - self.assertEqual(aggr.countries[0].id, "ie") - self.assertEqual(aggr.countries[0].title, "Ireland") - self.assertEqual(aggr.countries[0].count, 48212) - self.assertEqual(aggr.countries[1].id, "fr") - self.assertEqual(aggr.countries[1].title, "France") - self.assertEqual(aggr.countries[1].count, 4821) - self.assertEqual(aggr.countries[2].id, "gb") - self.assertEqual(aggr.countries[2].title, "United Kingdom") - self.assertEqual(aggr.countries[2].count, 482) - self.assertEqual(aggr.countries[3].id, "us") - self.assertEqual(aggr.countries[3].title, "United States") - self.assertEqual(aggr.countries[3].count, 48) - self.assertEqual(aggr.statuses[0].id, "active") - self.assertEqual(aggr.statuses[0].title, "active") - self.assertEqual(aggr.statuses[0].count, 102927) - self.assertEqual(aggr.statuses[1].id, "inactive") - self.assertEqual(aggr.statuses[1].title, "inactive") - self.assertEqual(aggr.statuses[1].count, 3) - self.assertEqual(aggr.statuses[2].id, "withdrawn") - self.assertEqual(aggr.statuses[2].title, "withdrawn") - self.assertEqual(aggr.statuses[2].count, 2) - - class ErrorsTestCase(SimpleTestCase): def test_attributes_exist(self): data = ["err1", "e2", "terrible error 3"] diff --git a/rorapi/tests/tests_unit/tests_models_v1.py b/rorapi/tests/tests_unit/tests_models_v1.py index b4cb7ab..65f9c43 100644 --- a/rorapi/tests/tests_unit/tests_models_v1.py +++ b/rorapi/tests/tests_unit/tests_models_v1.py @@ -1,9 +1,10 @@ from django.test import SimpleTestCase from rorapi.v1.models import ( + Aggregations, ExternalIds, Organization, - MatchedOrganization, + MatchedOrganization ) from .utils import AttrDict @@ -194,3 +195,70 @@ def test_attributes_exist(self): self.assertEqual(organization.chosen, data["chosen"]) self.assertEqual(organization.organization.id, data["organization"]["id"]) self.assertEqual(organization.organization.name, data["organization"]["name"]) + +class AggregationsTestCase(SimpleTestCase): + def test_attributes_exist(self): + aggr = Aggregations( + AttrDict( + { + "types": { + "buckets": [ + {"key": "TyPE 1", "doc_count": 482}, + {"key": "Type2", "doc_count": 42}, + ] + }, + "countries": { + "buckets": [ + {"key": "IE", "doc_count": 48212}, + {"key": "FR", "doc_count": 4821}, + {"key": "GB", "doc_count": 482}, + {"key": "US", "doc_count": 48}, + ] + }, + "continentss": { + "buckets": [ + {"key": "AF", "doc_count": 48212}, + {"key": "AS", "doc_count": 4821}, + {"key": "EU", "doc_count": 482}, + {"key": "NA", "doc_count": 48}, + ] + }, + "statuses": { + "buckets": [ + {"key": "active", "doc_count": 102927}, + {"key": "inactive", "doc_count": 3}, + {"key": "withdrawn", "doc_count": 2}, + ] + }, + } + ) + ) + self.assertEqual(len(aggr.types), 2) + self.assertEqual(aggr.types[0].id, "type 1") + self.assertEqual(aggr.types[0].title, "TyPE 1") + self.assertEqual(aggr.types[0].count, 482) + self.assertEqual(aggr.types[1].id, "type2") + self.assertEqual(aggr.types[1].title, "Type2") + self.assertEqual(aggr.types[1].count, 42) + self.assertEqual(len(aggr.countries), 4) + self.assertEqual(aggr.countries[0].id, "ie") + self.assertEqual(aggr.countries[0].title, "Ireland") + self.assertEqual(aggr.countries[0].count, 48212) + self.assertEqual(aggr.countries[1].id, "fr") + self.assertEqual(aggr.countries[1].title, "France") + self.assertEqual(aggr.countries[1].count, 4821) + self.assertEqual(aggr.countries[2].id, "gb") + self.assertEqual(aggr.countries[2].title, "United Kingdom") + self.assertEqual(aggr.countries[2].count, 482) + self.assertEqual(aggr.countries[3].id, "us") + self.assertEqual(aggr.countries[3].title, "United States") + self.assertEqual(aggr.countries[3].count, 48) + self.assertEqual(aggr.statuses[0].id, "active") + self.assertEqual(aggr.statuses[0].title, "active") + self.assertEqual(aggr.statuses[0].count, 102927) + self.assertEqual(aggr.statuses[1].id, "inactive") + self.assertEqual(aggr.statuses[1].title, "inactive") + self.assertEqual(aggr.statuses[1].count, 3) + self.assertEqual(aggr.statuses[2].id, "withdrawn") + self.assertEqual(aggr.statuses[2].title, "withdrawn") + self.assertEqual(aggr.statuses[2].count, 2) diff --git a/rorapi/tests/tests_unit/tests_models_v2.py b/rorapi/tests/tests_unit/tests_models_v2.py index d18bc1f..291472f 100644 --- a/rorapi/tests/tests_unit/tests_models_v2.py +++ b/rorapi/tests/tests_unit/tests_models_v2.py @@ -1,6 +1,6 @@ from django.test import SimpleTestCase -from rorapi.v2.models import Organization, MatchedOrganization +from rorapi.v2.models import Aggregations, Organization, MatchedOrganization from .utils import AttrDict @@ -173,3 +173,82 @@ def test_attributes_exist(self): for i, name in enumerate(organization.organization.names): matched_names = [n for n in data["organization"]["names"] if n['value']==organization.organization.names[i].value and n['types']==organization.organization.names[i].types and organization.organization.names[i].lang] self.assertTrue(len(matched_names) == 1) + +class AggregationsTestCase(SimpleTestCase): + def test_attributes_exist(self): + aggr = Aggregations( + AttrDict( + { + "types": { + "buckets": [ + {"key": "TyPE 1", "doc_count": 482}, + {"key": "Type2", "doc_count": 42}, + ] + }, + "countries": { + "buckets": [ + {"key": "IE", "doc_count": 48212}, + {"key": "FR", "doc_count": 4821}, + {"key": "GB", "doc_count": 482}, + {"key": "US", "doc_count": 48}, + ] + }, + "continents": { + "buckets": [ + {"key": "AF", "doc_count": 48212}, + {"key": "AS", "doc_count": 4821}, + {"key": "EU", "doc_count": 482}, + {"key": "NA", "doc_count": 48}, + ] + }, + "statuses": { + "buckets": [ + {"key": "active", "doc_count": 102927}, + {"key": "inactive", "doc_count": 3}, + {"key": "withdrawn", "doc_count": 2}, + ] + }, + } + ) + ) + self.assertEqual(len(aggr.types), 2) + self.assertEqual(aggr.types[0].id, "type 1") + self.assertEqual(aggr.types[0].title, "TyPE 1") + self.assertEqual(aggr.types[0].count, 482) + self.assertEqual(aggr.types[1].id, "type2") + self.assertEqual(aggr.types[1].title, "Type2") + self.assertEqual(aggr.types[1].count, 42) + self.assertEqual(len(aggr.countries), 4) + self.assertEqual(aggr.countries[0].id, "ie") + self.assertEqual(aggr.countries[0].title, "Ireland") + self.assertEqual(aggr.countries[0].count, 48212) + self.assertEqual(aggr.countries[1].id, "fr") + self.assertEqual(aggr.countries[1].title, "France") + self.assertEqual(aggr.countries[1].count, 4821) + self.assertEqual(aggr.countries[2].id, "gb") + self.assertEqual(aggr.countries[2].title, "United Kingdom") + self.assertEqual(aggr.countries[2].count, 482) + self.assertEqual(aggr.countries[3].id, "us") + self.assertEqual(aggr.countries[3].title, "United States") + self.assertEqual(aggr.countries[3].count, 48) + self.assertEqual(aggr.continents[0].id, "af") + self.assertEqual(aggr.continents[0].title, "Africa") + self.assertEqual(aggr.continents[0].count, 48212) + self.assertEqual(aggr.continents[1].id, "as") + self.assertEqual(aggr.continents[1].title, "Asia") + self.assertEqual(aggr.continents[1].count, 4821) + self.assertEqual(aggr.continents[2].id, "eu") + self.assertEqual(aggr.continents[2].title, "Europe") + self.assertEqual(aggr.continents[2].count, 482) + self.assertEqual(aggr.continents[3].id, "na") + self.assertEqual(aggr.continents[3].title, "North America") + self.assertEqual(aggr.continents[3].count, 48) + self.assertEqual(aggr.statuses[0].id, "active") + self.assertEqual(aggr.statuses[0].title, "active") + self.assertEqual(aggr.statuses[0].count, 102927) + self.assertEqual(aggr.statuses[1].id, "inactive") + self.assertEqual(aggr.statuses[1].title, "inactive") + self.assertEqual(aggr.statuses[1].count, 3) + self.assertEqual(aggr.statuses[2].id, "withdrawn") + self.assertEqual(aggr.statuses[2].title, "withdrawn") + self.assertEqual(aggr.statuses[2].count, 2) diff --git a/rorapi/tests/tests_unit/tests_queries_v2.py b/rorapi/tests/tests_unit/tests_queries_v2.py index 3351aa6..9084678 100644 --- a/rorapi/tests/tests_unit/tests_queries_v2.py +++ b/rorapi/tests/tests_unit/tests_queries_v2.py @@ -173,6 +173,7 @@ def setUp(self): self.default_query = \ {'aggs': {'types': {'terms': {'field': 'types', 'size': 10, 'min_doc_count': 1}}, 'countries': {'terms': {'field': 'locations.geonames_details.country_code', 'size': 10, 'min_doc_count': 1}}, + 'continents': {'terms': {'field': 'locations.geonames_details.continent_code', 'size': 10, 'min_doc_count': 1}}, 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, 'sort': [{'id': {'order': 'asc'}}], 'track_total_hits': True, 'from': 0, 'size': 20} @@ -559,6 +560,14 @@ def test_search_organizations(self, search_mock): self.test_data['aggregations']['countries']['buckets']): self.assertEquals(ret.id, exp['key'].lower()) self.assertEquals(ret.count, exp['doc_count']) + self.assertEquals( + len(organizations.meta.continents), + len(self.test_data['aggregations']['continents']['buckets'])) + for ret, exp in \ + zip(organizations.meta.continents, + self.test_data['aggregations']['continents']['buckets']): + self.assertEquals(ret.id, exp['key'].lower()) + self.assertEquals(ret.count, exp['doc_count']) self.assertEquals( len(organizations.meta.statuses), len(self.test_data['aggregations']['statuses']['buckets'])) diff --git a/rorapi/tests/tests_unit/tests_views_v2.py b/rorapi/tests/tests_unit/tests_views_v2.py index a36325e..c7aa1c5 100644 --- a/rorapi/tests/tests_unit/tests_views_v2.py +++ b/rorapi/tests/tests_unit/tests_views_v2.py @@ -54,6 +54,14 @@ def test_search_organizations(self, search_mock): self.test_data['aggregations']['countries']['buckets']): self.assertEquals(ret['id'], exp['key'].lower()) self.assertEquals(ret['count'], exp['doc_count']) + self.assertEquals( + len(organizations['meta']['continents']), + len(self.test_data['aggregations']['continents']['buckets'])) + for ret, exp in \ + zip(organizations['meta']['continents'], + self.test_data['aggregations']['continents']['buckets']): + self.assertEquals(ret['id'], exp['key'].lower()) + self.assertEquals(ret['count'], exp['doc_count']) @mock.patch('elasticsearch_dsl.Search.execute') def test_invalid_search_organizations(self, search_mock): diff --git a/rorapi/v1/models.py b/rorapi/v1/models.py index 2f7c2fd..0e84f03 100644 --- a/rorapi/v1/models.py +++ b/rorapi/v1/models.py @@ -1,5 +1,21 @@ from geonamescache.mappers import country -from rorapi.common.models import Aggregations, Entity +from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity + +class ContinentBucket: + """A model class for country aggregation bucket""" + + def __init__(self, data): + self.id = data.key.lower() + self.title = data.key + self.count = data.doc_count + +class Aggregations: + """Aggregations model class""" + + def __init__(self, data): + self.types = [TypeBucket(b) for b in data.types.buckets] + self.countries = [CountryBucket(b) for b in data.countries.buckets] + self.statuses = [StatusBucket(b) for b in data.statuses.buckets] class GeoAdmin: def __init__(self, data): diff --git a/rorapi/v1/serializers.py b/rorapi/v1/serializers.py index 241028a..41fe9be 100644 --- a/rorapi/v1/serializers.py +++ b/rorapi/v1/serializers.py @@ -1,5 +1,10 @@ from rest_framework import serializers -from rorapi.common.serializers import AggregationsSerializer, OrganizationRelationshipsSerializer +from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer + +class AggregationsSerializer(serializers.Serializer): + types = BucketSerializer(many=True) + countries = BucketSerializer(many=True) + statuses = BucketSerializer(many=True) class OrganizationLabelSerializer(serializers.Serializer): diff --git a/rorapi/v2/models.py b/rorapi/v2/models.py index 1c4b8bf..4acfafe 100644 --- a/rorapi/v2/models.py +++ b/rorapi/v2/models.py @@ -1,17 +1,48 @@ from geonamescache.mappers import country -from rorapi.common.models import Aggregations, Entity +from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity +from rorapi.v2.record_constants import continent_code_to_name +class ContinentBucket: + """A model class for country aggregation bucket""" + + def __init__(self, data): + self.id = data.key.lower() + self.title = continent_code_to_name(data.key) + self.count = data.doc_count + +class CountryBucket: + """A model class for country aggregation bucket""" + + def __init__(self, data): + self.id = data.key.lower() + mapper = country(from_key="iso", to_key="name") + try: + self.title = mapper(data.key) + except AttributeError: + # if we have a country code with no name mapping, skip it to prevent 500 + pass + self.count = data.doc_count + + +class Aggregations: + """Aggregations model class""" + + def __init__(self, data): + self.types = [TypeBucket(b) for b in data.types.buckets] + self.countries = [CountryBucket(b) for b in data.countries.buckets] + self.continents = [ContinentBucket(b) for b in data.continents.buckets] + self.statuses = [StatusBucket(b) for b in data.statuses.buckets] class GeoNamesDetails: """A model class for storing geonames city hash""" def __init__(self, data): - self.continent_code = data.continent_code - self.continent_name = data.continent_name + self.continent_code = getattr(data, 'continent_code', None) + self.continent_name = getattr(data, 'continent_name', None) self.country_code = data.country_code self.country_name = data.country_name - self.country_subdivision_code = data.country_subdivision_code - self.country_subdivision_name = data.country_subdivision_name + self.country_subdivision_code = getattr(data, 'country_subdivision_code', None) + self.country_subdivision_name = getattr(data, 'country_subdivision_name', None) self.name = data.name self.lat = data.lat self.lng = data.lng diff --git a/rorapi/v2/record_constants.py b/rorapi/v2/record_constants.py index e962615..a6357cb 100644 --- a/rorapi/v2/record_constants.py +++ b/rorapi/v2/record_constants.py @@ -65,3 +65,17 @@ "types": None } +V2_CONTINENT_CODES_NAMES = { + "AF": "Africa", + "AN": "Antarctica", + "AS": "Asia", + "EU": "Europe", + "NA": "North America", + "OC": "Oceania", + "SA": "South America" +} + +def continent_code_to_name(continent_code): + if continent_code.upper() in V2_CONTINENT_CODES_NAMES.keys(): + return V2_CONTINENT_CODES_NAMES[continent_code.upper()] + return None diff --git a/rorapi/v2/serializers.py b/rorapi/v2/serializers.py index 43e109d..62327a5 100644 --- a/rorapi/v2/serializers.py +++ b/rorapi/v2/serializers.py @@ -1,9 +1,11 @@ from rest_framework import serializers -from rorapi.common.serializers import ( - AggregationsSerializer, - OrganizationRelationshipsSerializer, -) +from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer +class AggregationsSerializer(serializers.Serializer): + types = BucketSerializer(many=True) + countries = BucketSerializer(many=True) + continents = BucketSerializer(many=True) + statuses = BucketSerializer(many=True) class AdminDetailsSerializer(serializers.Serializer): date = serializers.DateTimeField() From a9b7f7de38e4c33e0abf0b3215c462f0cf480cba Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Thu, 21 Nov 2024 08:20:39 -0600 Subject: [PATCH 5/7] update filters and integration tests --- rorapi/common/queries.py | 70 ++-- .../{tests.py => tests_v1.py} | 38 +- rorapi/tests/tests_integration/tests_v2.py | 333 ++++++++++++++++++ 3 files changed, 403 insertions(+), 38 deletions(-) rename rorapi/tests/tests_integration/{tests.py => tests_v1.py} (87%) create mode 100644 rorapi/tests/tests_integration/tests_v2.py diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py index 812098e..ba75ac2 100644 --- a/rorapi/common/queries.py +++ b/rorapi/common/queries.py @@ -155,42 +155,46 @@ def check_status_adv_q(adv_q_string): status_in_q = True return status_in_q +def get_country_name_filters(country_name_field, filter_string, version): + country_name_filters = [] + if version == "v1": + allowed_filters = ALLOWED_FILTERS_V1 + else: + allowed_filters = ALLOWED_FILTERS_V2 + search = re.findall(country_name_field + ":([^:]*)", filter_string) + if search: + for s in search: + if len(re.findall(",", s)) > 1: + s = s.rsplit(",", 1)[0] + for allowed_filter in allowed_filters: + if allowed_filter in s: + s = s.rsplit("," + allowed_filter, 1)[0] + country_name_filter = country_name_field + ":" + s + filter_string = filter_string.replace(country_name_filter, "") + country_name_filters.append(country_name_filter) + return country_name_filters + def filter_string_to_list(filter_string, version): filter_list = [] - if "continent_code" in filter_string and version == "v2": - filter_string = filter_string.replace( - "continent_code", "locations.geonames_details.continent_code" - ) - if "continent_name" in filter_string and version == "v2": - filter_string = filter_string.replace( - "continent_name", "locations.geonames_details.continent_name" - ) - if "country.country_code" in filter_string and version == "v2": - filter_string = filter_string.replace( - "country.country_code", "locations.geonames_details.country_code" - ) + if version == "v2": + if "country.country_code" in filter_string: + filter_string = filter_string.replace( + "country.country_code", "locations.geonames_details.country_code" + ) + if "country.country_name" in filter_string: + filter_string = filter_string.replace( + "country.country_name", "locations.geonames_details.country_name" + ) # some country names contain comma chars # allow comma chars in country_name filter values only # country.country_name:Germany,types:Company - if "country.country_name" in filter_string: - country_name_filters = [] - search = re.findall("country.country_name:([^:]*)", filter_string) - if search: - for s in search: - if len(re.findall(",", s)) > 1: - s = s.rsplit(",", 1)[0] - for allowed_filter in ALLOWED_FILTERS: - if allowed_filter in s: - s = s.rsplit("," + allowed_filter, 1)[0] - country_name_filter = "country.country_name:" + s - v2_country_name_filter = "locations.geonames_details.country_name:" + s - filter_string = filter_string.replace(country_name_filter, "") - if version == "v2": - country_name_filters.append(v2_country_name_filter) - else: - country_name_filters.append(country_name_filter) - + if version == "v1": + country_name_field = "country.country_name" + else: + country_name_field = "locations.geonames_details.country_code" + if country_name_field in filter_string: + country_name_filters = get_country_name_filters(country_name_field, filter_string, version) filter_list = [f for f in filter_string.split(",") if f] filter_list = filter_list + country_name_filters else: @@ -253,7 +257,11 @@ def validate(params, version): valid_filters = [f for f in filters if ":" in f] filter_keys = [f.split(":")[0] for f in valid_filters] - illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS] + if version == "v1": + allowed_filters = ALLOWED_FILTERS_V1 + else: + allowed_filters = ALLOWED_FILTERS_V2 + illegal_keys = [v for v in filter_keys if v not in allowed_filters] errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys]) if "page" in params: diff --git a/rorapi/tests/tests_integration/tests.py b/rorapi/tests/tests_integration/tests_v1.py similarity index 87% rename from rorapi/tests/tests_integration/tests.py rename to rorapi/tests/tests_integration/tests_v1.py index 3cbef65..8073ffe 100644 --- a/rorapi/tests/tests_integration/tests.py +++ b/rorapi/tests/tests_integration/tests_v1.py @@ -7,7 +7,7 @@ from django.test import SimpleTestCase from rorapi.settings import ROR_API, ES_VARS -BASE_URL = '{}/organizations'.format( +BASE_URL = '{}/v1/organizations'.format( os.environ.get('ROR_BASE_URL', 'http://localhost')) @@ -42,6 +42,12 @@ def verify_full_list(self, output): self.assertTrue('id' in t) self.assertTrue('count' in t) + self.assertTrue('statuses' in output['meta']) + self.assertTrue(len(output['meta']['statuses']) > 0) + for t in output['meta']['statuses']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + def verify_empty(self, output): self.assertEquals(self.get_total(output), 0) self.assertEquals(output['items'], []) @@ -129,6 +135,7 @@ def verify_filtering(self, query): aggregations = requests.get(BASE_URL, query).json()['meta'] t_aggrs = aggregations['types'] c_aggrs = aggregations['countries'] + s_aggrs = aggregations['statuses'] for t_aggr in t_aggrs: filter_string = 'types:{}'.format(t_aggr['title']) @@ -154,9 +161,20 @@ def verify_filtering(self, query): self.assertTrue( any([c_aggr == c for c in output['meta']['countries']])) - for t_aggr, c_aggr in itertools.product(t_aggrs, c_aggrs): - filter_string = 'country.country_code:{},types:{}' \ - .format(c_aggr['id'].upper(), t_aggr['title']) + for s_aggr in s_aggrs: + filter_string = 'status:{}'.format(s_aggr['title']) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), s_aggr['count']) + for i in output['items']: + self.assertTrue(s_aggr['title'] in i['status']) + self.assertTrue(any([s_aggr == s + for s in output['meta']['statuses']])) + + for t_aggr, c_aggr, s_aggr in itertools.product(t_aggrs, c_aggrs, s_aggrs): + filter_string = 'country.country_code:{},types:{},status:{}' \ + .format(c_aggr['id'].upper(), t_aggr['title'], s_aggr['title']) params = dict(query, filter=filter_string) status_code = requests.get(BASE_URL, params).status_code if status_code != 200: @@ -167,10 +185,12 @@ def verify_filtering(self, query): continue self.assertTrue(self.get_total(output) <= t_aggr['count']) self.assertTrue(self.get_total(output) <= c_aggr['count']) + self.assertTrue(self.get_total(output) <= s_aggr['count']) for i in output['items']: self.assertTrue(t_aggr['title'] in i['types']) self.assertEquals(c_aggr['id'].upper(), i['country']['country_code']) + self.assertTrue(s_aggr['title'] in i['status']) self.assertTrue( any([t_aggr['id'] == t['id'] for t in output['meta']['types']])) @@ -179,6 +199,9 @@ def verify_filtering(self, query): c_aggr['id'] == c['id'] for c in output['meta']['countries'] ])) + self.assertTrue( + any([s_aggr['id'] == s['id'] + for s in output['meta']['statuses']])) def test_filtering(self): self.verify_filtering({}) @@ -220,9 +243,10 @@ def test_retrieval(self): def test_query_grid_retrieval(self): for test_org in requests.get(BASE_URL).json()['items']: - grid = test_org['external_ids']['GRID']['preferred'] - output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json() - self.verify_single_item(output, test_org) + if 'GRID' in test_org['external_ids'].keys(): + grid = test_org['external_ids']['GRID']['preferred'] + output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json() + self.verify_single_item(output, test_org) def test_error(self): output = requests.get(BASE_URL, { diff --git a/rorapi/tests/tests_integration/tests_v2.py b/rorapi/tests/tests_integration/tests_v2.py new file mode 100644 index 0000000..bc92fb6 --- /dev/null +++ b/rorapi/tests/tests_integration/tests_v2.py @@ -0,0 +1,333 @@ +import itertools +import json +import os +import re +import requests + +from django.test import SimpleTestCase +from rorapi.settings import ROR_API, ES_VARS + +BASE_URL = '{}/v2/organizations'.format( + os.environ.get('ROR_BASE_URL', 'http://localhost')) + + +class APITestCase(SimpleTestCase): + def get_total(self, output): + return output['number_of_results'] + + def get_total_from_query(self, query): + return self.get_total(requests.get(BASE_URL, query).json()) + + def verify_full_list(self, output): + # TODO use JSON schema instead? + for k in ['number_of_results', 'time_taken', 'items', 'meta']: + self.assertTrue(k in output) + + self.assertEquals(len(output['items']), 20) + for i in output['items']: + for k in ['id', 'names']: + self.assertTrue(k in i) + self.assertIsNotNone( + re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', i['id'])) + + self.assertTrue('types' in output['meta']) + self.assertTrue(len(output['meta']['types']) > 0) + for t in output['meta']['types']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + self.assertTrue('countries' in output['meta']) + self.assertTrue(len(output['meta']['countries']) > 0) + for t in output['meta']['countries']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + self.assertTrue('statuses' in output['meta']) + self.assertTrue(len(output['meta']['statuses']) > 0) + for t in output['meta']['statuses']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + self.assertTrue('continents' in output['meta']) + self.assertTrue(len(output['meta']['continents']) >= 0) + for t in output['meta']['continents']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + def verify_empty(self, output): + self.assertEquals(self.get_total(output), 0) + self.assertEquals(output['items'], []) + self.assertEquals(output['meta'], {'types': [], 'statuses': [],'countries': [],'continents': []}) + + def verify_single_item(self, output, org): + self.assertEquals(self.get_total(output), 1) + self.assertEquals(output['items'][0], org) + + def test_list_organizations(self): + output = requests.get(BASE_URL).json() + + self.verify_full_list(output) + # sanity check + self.assertTrue(self.get_total(output) > 50000) + + def test_query_organizations(self): + total = self.get_total_from_query({}) + + output = requests.get(BASE_URL, {'query': 'university'}).json() + self.verify_full_list(output) + self.assertTrue(self.get_total(output) < total) + + def test_deprecated_queries(self): + for q in [{}, { + 'page': 7 + }, { + 'filter': 'country.country_code:US' + }, { + 'filter': 'country.country_code:US', + 'page': 3 + }]: + status_code = requests.get(BASE_URL, + dict(q, query='university')).status_code + if status_code != 200: + print("failing query: ", dict(q, query='university')) + output = requests.get(BASE_URL, dict(q, query='university')).json() + del output['time_taken'] + output_deprecated = requests.get( + BASE_URL, dict(q, **{'query.name': 'university'})).json() + del output_deprecated['time_taken'] + self.assertEqual(output_deprecated, output) + + def verify_paging(self, query): + total = self.get_total_from_query(query) + max_page = min(400, int(total / ROR_API['PAGE_SIZE'])) + outputs = [ + requests.get(BASE_URL, dict(query, page=n)).json() + for n in range(1, max_page + 1) + ] + + for output in outputs: + self.verify_full_list(output) + # all responses declare the same number of results + self.assertEquals(len(set([self.get_total(o) for o in outputs])), 1) + # IDs of the items listed are all distinct + self.assertEquals(len(set([o['items'][0]['id'] for o in outputs])), + max_page) + # all responses have the same aggregations + self.assertEquals(len(set([json.dumps(o['meta']) for o in outputs])), + 1) + + def test_paging(self): + self.verify_paging({}) + + self.verify_paging({'query': 'university'}) + self.verify_paging({ + 'query': 'university', + 'filter': 'types:Healthcare' + }) + + def test_iteration(self): + total = 10000 + ids = [] + for page in range(1, ES_VARS['MAX_PAGE'] + 1): + request = requests.get(BASE_URL, {'page': page}) + if request.status_code != 200: + print("failing query: ", {'page': page}) + output = requests.get(BASE_URL, {'page': page}).json() + ids.extend([i['id'] for i in output['items']]) + self.assertEquals(len(ids), total) + self.assertEquals(len(set(ids)), total) + + def verify_filtering(self, query): + aggregations = requests.get(BASE_URL, query).json()['meta'] + t_aggrs = aggregations['types'] + c_aggrs = aggregations['countries'] + s_aggrs = aggregations['statuses'] + co_aggrs = aggregations['continents'] + + for t_aggr in t_aggrs: + filter_string = 'types:{}'.format(t_aggr['title']) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), t_aggr['count']) + for i in output['items']: + self.assertTrue(t_aggr['title'] in i['types']) + self.assertTrue(any([t_aggr == t + for t in output['meta']['types']])) + + for c_aggr in c_aggrs: + filter_string = 'locations.geonames_details.country_code:{}' \ + .format(c_aggr['id'].upper()) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), c_aggr['count']) + for i in output['items']: + self.assertEquals(c_aggr['id'].upper(), + i['locations'][0]['geonames_details']['country_code']) + self.assertTrue( + any([c_aggr == c for c in output['meta']['countries']])) + + for s_aggr in s_aggrs: + filter_string = 'status:{}'.format(s_aggr['title']) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), s_aggr['count']) + for i in output['items']: + self.assertTrue(s_aggr['title'] in i['status']) + self.assertTrue(any([s_aggr == s + for s in output['meta']['statuses']])) + + for co_aggr in co_aggrs: + filter_string = 'locations.geonames_details.continent_code:{}' \ + .format(co_aggr['id'].upper()) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), co_aggr['count']) + for i in output['items']: + self.assertEquals(co_aggr['id'].upper(), + i['locations'][0]['geonames_details']['continent_code']) + self.assertTrue( + any([co_aggr == co for co in output['meta']['continents']])) + + + + for t_aggr, c_aggr, s_aggr, co_aggr in itertools.product(t_aggrs, c_aggrs, s_aggrs, co_aggrs): + filter_string = 'locations.geonames_details.country_code:{},types:{},status:{},locations.geonames_details.continent_code:{}' \ + .format(c_aggr['id'].upper(), t_aggr['title'], s_aggr['title'], co_aggr['id'].upper()) + params = dict(query, filter=filter_string) + status_code = requests.get(BASE_URL, params).status_code + if status_code != 200: + print("failing params: ", params) + output = requests.get(BASE_URL, params).json() + if self.get_total(output) == 0: + self.verify_empty(output) + continue + self.assertTrue(self.get_total(output) <= t_aggr['count']) + self.assertTrue(self.get_total(output) <= c_aggr['count']) + self.assertTrue(self.get_total(output) <= s_aggr['count']) + self.assertTrue(self.get_total(output) <= co_aggr['count']) + for i in output['items']: + self.assertTrue(t_aggr['title'] in i['types']) + self.assertEquals(c_aggr['id'].upper(), + i['locations'][0]['geonames_details']['country_code']) + self.assertTrue(s_aggr['title'] in i['status']) + self.assertEquals(co_aggr['id'].upper(), + i['locations'][0]['geonames_details']['continent_code']) + self.assertTrue( + any([t_aggr['id'] == t['id'] + for t in output['meta']['types']])) + self.assertTrue( + any([ + c_aggr['id'] == c['id'] + for c in output['meta']['countries'] + ])) + self.assertTrue( + any([s_aggr['id'] == s['id'] + for s in output['meta']['statuses']])) + self.assertTrue( + any([ + co_aggr['id'] == co['id'] + for co in output['meta']['continents'] + ])) + + def test_filtering(self): + self.verify_filtering({}) + self.verify_filtering({'query': 'university'}) + + def test_empty_output(self): + output = requests.get(BASE_URL, {'filter': 'types:notatype'}).json() + self.verify_empty(output) + + def test_query_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + for test_id in \ + [test_org['id'], + re.sub('https', 'http', test_org['id']), + re.sub(r'https:\/\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', + test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', + test_org['id'])]: + output = requests.get(BASE_URL, {'query': test_id}).json() + self.verify_single_item(output, test_org) + + def test_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + for test_id in \ + [test_org['id'], + re.sub('https', 'http', test_org['id']), + re.sub(r'https:\/\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', + test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', + test_org['id'])]: + output = requests.get(BASE_URL + '/' + test_id).json() + self.assertEquals(output, test_org) + + def test_query_grid_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + for ext_id in test_org['external_ids']: + if ext_id['type'] == 'grid': + grid_id = ext_id['preferred'] + output = requests.get(BASE_URL, {'query': '"' + grid_id + '"'}).json() + self.verify_single_item(output, test_org) + + def test_error(self): + output = requests.get(BASE_URL, { + 'query': 'query', + 'illegal': 'whatever', + 'another': 3 + }).json() + self.assertEquals(len(output['errors']), 2) + self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) + self.assertTrue(any(['\'another\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'filter': 'fi1:e,types:F,f3,field2:44' + }).json() + self.assertEquals(len(output['errors']), 3) + self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) + self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) + self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'page': 'whatever' + }).json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'whatever\'' in output['errors'][0]) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'page': '10000' + }).json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'10000\'' in output['errors'][0]) + + output = requests.get( + BASE_URL, { + 'query': 'query', + 'illegal': 'whatever', + 'filter': 'fi1:e,types:F,f3,field2:44', + 'another': 3, + 'page': 'third' + }).json() + self.assertEquals(len(output['errors']), 6) + self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) + self.assertTrue(any(['\'another\'' in e for e in output['errors']])) + self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) + self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) + self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) + self.assertTrue(any(['\'third\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL + '/https://ror.org/0qwerty89').json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'https://ror.org/0qwerty89\'' in output['errors'][0]) From fe48347ddbeaecdf302747a972d1f71b296b7f30 Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Thu, 21 Nov 2024 16:46:29 -0600 Subject: [PATCH 6/7] udpate integration tests and v1 models --- rorapi/common/models.py | 18 -- rorapi/tests/tests_integration/tests.py | 278 ++++++++++++++++++ .../tests_integration/tests_search_v1.py | 2 +- .../tests_integration/tests_search_v2.py | 2 +- rorapi/tests/tests_integration/tests_v2.py | 2 - rorapi/v1/models.py | 8 - 6 files changed, 280 insertions(+), 30 deletions(-) create mode 100644 rorapi/tests/tests_integration/tests.py diff --git a/rorapi/common/models.py b/rorapi/common/models.py index d0e8140..e08dd21 100644 --- a/rorapi/common/models.py +++ b/rorapi/common/models.py @@ -30,14 +30,6 @@ def __init__(self, data): pass self.count = data.doc_count -class ContinentBucket: - """A model class for country aggregation bucket""" - - def __init__(self, data): - self.id = data.key.lower() - self.title = data.key - self.count = data.doc_count - class StatusBucket: """A model class for status aggregation bucket""" @@ -48,16 +40,6 @@ def __init__(self, data): self.count = data.doc_count -class Aggregations: - """Aggregations model class""" - - def __init__(self, data): - self.types = [TypeBucket(b) for b in data.types.buckets] - self.countries = [CountryBucket(b) for b in data.countries.buckets] - self.continents = [ContinentBucket(b) for b in data.continents.buckets] - self.statuses = [StatusBucket(b) for b in data.statuses.buckets] - - class Errors: """Errors model class""" diff --git a/rorapi/tests/tests_integration/tests.py b/rorapi/tests/tests_integration/tests.py new file mode 100644 index 0000000..75d8ab1 --- /dev/null +++ b/rorapi/tests/tests_integration/tests.py @@ -0,0 +1,278 @@ +import itertools +import json +import os +import re +import requests + +from django.test import SimpleTestCase +from rorapi.settings import ROR_API, ES_VARS + +BASE_URL = '{}/organizations'.format( + os.environ.get('ROR_BASE_URL', 'http://localhost')) + + +class APITestCase(SimpleTestCase): + def get_total(self, output): + return output['number_of_results'] + + def get_total_from_query(self, query): + return self.get_total(requests.get(BASE_URL, query).json()) + + def verify_full_list(self, output): + # TODO use JSON schema instead? + for k in ['number_of_results', 'time_taken', 'items', 'meta']: + self.assertTrue(k in output) + + self.assertEquals(len(output['items']), 20) + for i in output['items']: + for k in ['id', 'name']: + self.assertTrue(k in i) + self.assertIsNotNone( + re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', i['id'])) + + self.assertTrue('types' in output['meta']) + self.assertTrue(len(output['meta']['types']) > 0) + for t in output['meta']['types']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + self.assertTrue('countries' in output['meta']) + self.assertTrue(len(output['meta']['countries']) > 0) + for t in output['meta']['countries']: + self.assertTrue('id' in t) + self.assertTrue('count' in t) + + def verify_empty(self, output): + self.assertEquals(self.get_total(output), 0) + self.assertEquals(output['items'], []) + self.assertEquals(output['meta'], {'types': [], 'statuses': [],'countries': []}) + + def verify_single_item(self, output, org): + self.assertEquals(self.get_total(output), 1) + self.assertEquals(output['items'][0], org) + + def test_list_organizations(self): + output = requests.get(BASE_URL).json() + + self.verify_full_list(output) + # sanity check + self.assertTrue(self.get_total(output) > 50000) + + def test_query_organizations(self): + total = self.get_total_from_query({}) + + output = requests.get(BASE_URL, {'query': 'university'}).json() + self.verify_full_list(output) + self.assertTrue(self.get_total(output) < total) + + def test_deprecated_queries(self): + for q in [{}, { + 'page': 7 + }, { + 'filter': 'country.country_code:US' + }, { + 'filter': 'country.country_code:US', + 'page': 3 + }]: + status_code = requests.get(BASE_URL, + dict(q, query='university')).status_code + if status_code != 200: + print("failing query: ", dict(q, query='university')) + output = requests.get(BASE_URL, dict(q, query='university')).json() + del output['time_taken'] + output_deprecated = requests.get( + BASE_URL, dict(q, **{'query.name': 'university'})).json() + del output_deprecated['time_taken'] + self.assertEqual(output_deprecated, output) + + def verify_paging(self, query): + total = self.get_total_from_query(query) + max_page = min(400, int(total / ROR_API['PAGE_SIZE'])) + outputs = [ + requests.get(BASE_URL, dict(query, page=n)).json() + for n in range(1, max_page + 1) + ] + + for output in outputs: + self.verify_full_list(output) + # all responses declare the same number of results + self.assertEquals(len(set([self.get_total(o) for o in outputs])), 1) + # IDs of the items listed are all distinct + self.assertEquals(len(set([o['items'][0]['id'] for o in outputs])), + max_page) + # all responses have the same aggregations + self.assertEquals(len(set([json.dumps(o['meta']) for o in outputs])), + 1) + + def test_paging(self): + self.verify_paging({}) + + self.verify_paging({'query': 'university'}) + self.verify_paging({ + 'query': 'university', + 'filter': 'types:Healthcare' + }) + + def test_iteration(self): + total = 10000 + ids = [] + for page in range(1, ES_VARS['MAX_PAGE'] + 1): + request = requests.get(BASE_URL, {'page': page}) + if request.status_code != 200: + print("failing query: ", {'page': page}) + output = requests.get(BASE_URL, {'page': page}).json() + ids.extend([i['id'] for i in output['items']]) + self.assertEquals(len(ids), total) + self.assertEquals(len(set(ids)), total) + + def verify_filtering(self, query): + aggregations = requests.get(BASE_URL, query).json()['meta'] + t_aggrs = aggregations['types'] + c_aggrs = aggregations['countries'] + + for t_aggr in t_aggrs: + filter_string = 'types:{}'.format(t_aggr['title']) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), t_aggr['count']) + for i in output['items']: + self.assertTrue(t_aggr['title'] in i['types']) + self.assertTrue(any([t_aggr == t + for t in output['meta']['types']])) + + for c_aggr in c_aggrs: + filter_string = 'country.country_code:{}' \ + .format(c_aggr['id'].upper()) + params = dict(query, filter=filter_string) + output = requests.get(BASE_URL, params).json() + + self.assertEquals(self.get_total(output), c_aggr['count']) + for i in output['items']: + self.assertEquals(c_aggr['id'].upper(), + i['country']['country_code']) + self.assertTrue( + any([c_aggr == c for c in output['meta']['countries']])) + + for t_aggr, c_aggr in itertools.product(t_aggrs, c_aggrs): + filter_string = 'country.country_code:{},types:{}' \ + .format(c_aggr['id'].upper(), t_aggr['title']) + params = dict(query, filter=filter_string) + status_code = requests.get(BASE_URL, params).status_code + if status_code != 200: + print("failing params: ", params) + output = requests.get(BASE_URL, params).json() + if self.get_total(output) == 0: + self.verify_empty(output) + continue + self.assertTrue(self.get_total(output) <= t_aggr['count']) + self.assertTrue(self.get_total(output) <= c_aggr['count']) + for i in output['items']: + self.assertTrue(t_aggr['title'] in i['types']) + self.assertEquals(c_aggr['id'].upper(), + i['country']['country_code']) + self.assertTrue( + any([t_aggr['id'] == t['id'] + for t in output['meta']['types']])) + self.assertTrue( + any([ + c_aggr['id'] == c['id'] + for c in output['meta']['countries'] + ])) + + def test_filtering(self): + self.verify_filtering({}) + self.verify_filtering({'query': 'university'}) + + def test_empty_output(self): + output = requests.get(BASE_URL, {'filter': 'types:notatype'}).json() + self.verify_empty(output) + + def test_query_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + for test_id in \ + [test_org['id'], + re.sub('https', 'http', test_org['id']), + re.sub(r'https:\/\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', + test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', + test_org['id'])]: + output = requests.get(BASE_URL, {'query': test_id}).json() + self.verify_single_item(output, test_org) + + def test_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + for test_id in \ + [test_org['id'], + re.sub('https', 'http', test_org['id']), + re.sub(r'https:\/\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', '', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', + test_org['id']), + re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', + test_org['id'])]: + output = requests.get(BASE_URL + '/' + test_id).json() + self.assertEquals(output, test_org) + + def test_query_grid_retrieval(self): + for test_org in requests.get(BASE_URL).json()['items']: + grid = test_org['external_ids']['GRID']['preferred'] + output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json() + self.verify_single_item(output, test_org) + + def test_error(self): + output = requests.get(BASE_URL, { + 'query': 'query', + 'illegal': 'whatever', + 'another': 3 + }).json() + self.assertEquals(len(output['errors']), 2) + self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) + self.assertTrue(any(['\'another\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'filter': 'fi1:e,types:F,f3,field2:44' + }).json() + self.assertEquals(len(output['errors']), 3) + self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) + self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) + self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'page': 'whatever' + }).json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'whatever\'' in output['errors'][0]) + + output = requests.get(BASE_URL, { + 'query': 'query', + 'page': '10000' + }).json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'10000\'' in output['errors'][0]) + + output = requests.get( + BASE_URL, { + 'query': 'query', + 'illegal': 'whatever', + 'filter': 'fi1:e,types:F,f3,field2:44', + 'another': 3, + 'page': 'third' + }).json() + self.assertEquals(len(output['errors']), 6) + self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) + self.assertTrue(any(['\'another\'' in e for e in output['errors']])) + self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) + self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) + self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) + self.assertTrue(any(['\'third\'' in e for e in output['errors']])) + + output = requests.get(BASE_URL + '/https://ror.org/0qwerty89').json() + self.assertEquals(len(output['errors']), 1) + self.assertTrue('\'https://ror.org/0qwerty89\'' in output['errors'][0]) diff --git a/rorapi/tests/tests_integration/tests_search_v1.py b/rorapi/tests/tests_integration/tests_search_v1.py index 505bd66..dca9798 100644 --- a/rorapi/tests/tests_integration/tests_search_v1.py +++ b/rorapi/tests/tests_integration/tests_search_v1.py @@ -117,7 +117,7 @@ def test_typos(self): 'query': 'julius~ maximilian~ universitat~ wuerzburg~' }).json() self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') + self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57') def test_extra_word(self): items = requests.get( diff --git a/rorapi/tests/tests_integration/tests_search_v2.py b/rorapi/tests/tests_integration/tests_search_v2.py index e002aed..c6eccc4 100644 --- a/rorapi/tests/tests_integration/tests_search_v2.py +++ b/rorapi/tests/tests_integration/tests_search_v2.py @@ -117,7 +117,7 @@ def test_typos(self): 'query': 'julius~ maximilian~ universitat~ wuerzburg~' }).json() self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') + self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57') def test_extra_word(self): items = requests.get( diff --git a/rorapi/tests/tests_integration/tests_v2.py b/rorapi/tests/tests_integration/tests_v2.py index bc92fb6..705488c 100644 --- a/rorapi/tests/tests_integration/tests_v2.py +++ b/rorapi/tests/tests_integration/tests_v2.py @@ -192,8 +192,6 @@ def verify_filtering(self, query): self.assertTrue( any([co_aggr == co for co in output['meta']['continents']])) - - for t_aggr, c_aggr, s_aggr, co_aggr in itertools.product(t_aggrs, c_aggrs, s_aggrs, co_aggrs): filter_string = 'locations.geonames_details.country_code:{},types:{},status:{},locations.geonames_details.continent_code:{}' \ .format(c_aggr['id'].upper(), t_aggr['title'], s_aggr['title'], co_aggr['id'].upper()) diff --git a/rorapi/v1/models.py b/rorapi/v1/models.py index 0e84f03..18fb9a1 100644 --- a/rorapi/v1/models.py +++ b/rorapi/v1/models.py @@ -1,14 +1,6 @@ from geonamescache.mappers import country from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity -class ContinentBucket: - """A model class for country aggregation bucket""" - - def __init__(self, data): - self.id = data.key.lower() - self.title = data.key - self.count = data.doc_count - class Aggregations: """Aggregations model class""" From 6032b127f6e785ef8d57539608f2c8eee70c6ef4 Mon Sep 17 00:00:00 2001 From: lizkrznarich Date: Thu, 21 Nov 2024 16:48:01 -0600 Subject: [PATCH 7/7] remove sort order from queries --- rorapi/common/queries.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py index ba75ac2..2de6aa7 100644 --- a/rorapi/common/queries.py +++ b/rorapi/common/queries.py @@ -359,7 +359,6 @@ def build_search_query(params, version): sort_field = params.get("sort", "id") sort_order = params.get("order", "asc") - qb.add_sort(sort_field, sort_order) qb.paginate(int(params.get("page", 1))) return qb.get_query()