Merge pull request #410 from ror-community/schema-v2-1

Schema v2.1 implementation
ror-community · Dec 3, 2024 · 27d5352 · 27d5352
2 parents 3e03b8c + 6032b12
commit 27d5352
Show file tree

Hide file tree

Showing 30 changed files with 1,096 additions and 161 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -20,7 +20,7 @@ boto3
 pandas==1.4.1
 numpy==1.22
 titlecase==2.3
-update_address @ git+https://github.com/ror-community/update_address.git@v2-locations
+update_address @ git+https://github.com/ror-community/update_address.git@v2-1-locations
 launchdarkly-server-sdk==7.6.1
 jsonschema==3.2.0
 python-magic

diff --git a/rorapi/common/create_update.py b/rorapi/common/create_update.py
@@ -8,7 +8,7 @@
 )
 from rorapi.management.commands.generaterorid import check_ror_id
 
-V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/master/ror_schema_v2_0.json")
+V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/schema-v2-1/ror_schema_v2_1.json")
 
 
 def update_record(json_input, existing_record):

diff --git a/rorapi/common/csv_bulk.py b/rorapi/common/csv_bulk.py
@@ -48,7 +48,7 @@ def process_csv(csv_file, version, validate_only):
     success_msg = None
     error = None
     report = []
-    report_fields = ['row', 'ror_id', 'action', 'errors']
+    report_fields = ['row', 'issue_url', 'ror_id', 'action', 'errors']
     skipped_count = 0
     updated_count = 0
     new_count = 0
@@ -57,10 +57,13 @@ def process_csv(csv_file, version, validate_only):
     reader = csv.DictReader(io.StringIO(read_file))
     row_num = 2
     for row in reader:
+        html_url = None
         ror_id = None
         updated = False
         print("Row data")
         print(row)
+        if row['html_url']:
+            html_url = row['html_url']
         if row['id']:
             ror_id = row['id']
             updated = True
@@ -86,7 +89,7 @@ def process_csv(csv_file, version, validate_only):
             skipped_count += 1
         if validate_only and action == 'created':
             ror_id = None
-        report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
+        report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
         row_num += 1
     if new_count > 0 or updated_count > 0 or skipped_count > 0:
         try:

diff --git a/rorapi/common/models.py b/rorapi/common/models.py
@@ -40,15 +40,6 @@ def __init__(self, data):
         self.count = data.doc_count
 
 
-class Aggregations:
-    """Aggregations model class"""
-
-    def __init__(self, data):
-        self.types = [TypeBucket(b) for b in data.types.buckets]
-        self.countries = [CountryBucket(b) for b in data.countries.buckets]
-        self.statuses = [StatusBucket(b) for b in data.statuses.buckets]
-
-
 class Errors:
     """Errors model class"""
 

diff --git a/rorapi/common/parsers.py b/rorapi/common/parsers.py
diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py
@@ -18,7 +18,8 @@
 
 from urllib.parse import unquote
 
-ALLOWED_FILTERS = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status")
+ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status")
+ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name")
 ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status")
 ALLOWED_ALL_STATUS_VALUES = ("", "true", "false")
 # includes deprecated ext id types
@@ -99,11 +100,15 @@
     "links.type",
     "links.value",
     "locations.geonames_id",
-    "locations.geonames_details.name",
-    "locations.geonames_details.lat",
-    "locations.geonames_details.lng",
+    "locations.geonames_details.continent_code",
+    "locations.geonames_details.continent_name",
     "locations.geonames_details.country_code",
     "locations.geonames_details.country_name",
+    "locations.geonames_details.country_subdivision_code",
+    "locations.geonames_details.country_subdivision_name",
+    "locations.geonames_details.lat",
+    "locations.geonames_details.lng",
+    "locations.geonames_details.name",
     "names.lang",
     "names.types",
     "names.value",
@@ -150,34 +155,46 @@ def check_status_adv_q(adv_q_string):
         status_in_q = True
     return status_in_q
 
+def get_country_name_filters(country_name_field, filter_string, version):
+    country_name_filters = []
+    if version == "v1":
+        allowed_filters = ALLOWED_FILTERS_V1
+    else:
+        allowed_filters = ALLOWED_FILTERS_V2
+    search = re.findall(country_name_field + ":([^:]*)", filter_string)
+    if search:
+        for s in search:
+            if len(re.findall(",", s)) > 1:
+                s = s.rsplit(",", 1)[0]
+            for allowed_filter in allowed_filters:
+                if allowed_filter in s:
+                    s = s.rsplit("," + allowed_filter, 1)[0]
+            country_name_filter = country_name_field + ":" + s
+            filter_string = filter_string.replace(country_name_filter, "")
+            country_name_filters.append(country_name_filter)
+    return country_name_filters
+
 
 def filter_string_to_list(filter_string, version):
     filter_list = []
-    if "country.country_code" in filter_string and version == "v2":
-        filter_string = filter_string.replace(
-            "country.country_code", "locations.geonames_details.country_code"
-        )
+    if version == "v2":
+        if "country.country_code" in filter_string:
+            filter_string = filter_string.replace(
+                "country.country_code", "locations.geonames_details.country_code"
+            )
+        if "country.country_name" in filter_string:
+            filter_string = filter_string.replace(
+                "country.country_name", "locations.geonames_details.country_name"
+            )
     # some country names contain comma chars
     # allow comma chars in country_name filter values only
     # country.country_name:Germany,types:Company
-    if "country.country_name" in filter_string:
-        country_name_filters = []
-        search = re.findall("country.country_name:([^:]*)", filter_string)
-        if search:
-            for s in search:
-                if len(re.findall(",", s)) > 1:
-                    s = s.rsplit(",", 1)[0]
-                for allowed_filter in ALLOWED_FILTERS:
-                    if allowed_filter in s:
-                        s = s.rsplit("," + allowed_filter, 1)[0]
-                country_name_filter = "country.country_name:" + s
-                v2_country_name_filter = "locations.geonames_details.country_name:" + s
-                filter_string = filter_string.replace(country_name_filter, "")
-                if version == "v2":
-                    country_name_filters.append(v2_country_name_filter)
-                else:
-                    country_name_filters.append(country_name_filter)
-
+    if version == "v1":
+        country_name_field = "country.country_name"
+    else:
+        country_name_field = "locations.geonames_details.country_code"
+    if country_name_field in filter_string:
+        country_name_filters = get_country_name_filters(country_name_field, filter_string, version)
         filter_list = [f for f in filter_string.split(",") if f]
         filter_list = filter_list + country_name_filters
     else:
@@ -240,7 +257,11 @@ def validate(params, version):
 
     valid_filters = [f for f in filters if ":" in f]
     filter_keys = [f.split(":")[0] for f in valid_filters]
-    illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS]
+    if version == "v1":
+        allowed_filters = ALLOWED_FILTERS_V1
+    else:
+        allowed_filters = ALLOWED_FILTERS_V2
+    illegal_keys = [v for v in filter_keys if v not in allowed_filters]
     errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys])
 
     if "page" in params:
@@ -323,6 +344,7 @@ def build_search_query(params, version):
             [
                 ("types", "types"),
                 ("countries", "locations.geonames_details.country_code"),
+                ("continents", "locations.geonames_details.continent_code"),
                 ("statuses", "status"),
             ]
         )
@@ -337,7 +359,6 @@ def build_search_query(params, version):
 
     sort_field = params.get("sort", "id")
     sort_order = params.get("order", "asc")
-    qb.add_sort(sort_field, sort_order)
 
     qb.paginate(int(params.get("page", 1)))
     return qb.get_query()

diff --git a/rorapi/common/serializers.py b/rorapi/common/serializers.py
@@ -13,11 +13,5 @@ class BucketSerializer(serializers.Serializer):
     count = serializers.IntegerField()
 
 
-class AggregationsSerializer(serializers.Serializer):
-    types = BucketSerializer(many=True)
-    countries = BucketSerializer(many=True)
-    statuses = BucketSerializer(many=True)
-
-
 class ErrorsSerializer(serializers.Serializer):
     errors = serializers.StringRelatedField(many=True)
diff --git a/rorapi/tests/tests_integration/tests.py b/rorapi/tests/tests_integration/tests.py
@@ -220,9 +220,9 @@ def test_retrieval(self):
 
     def test_query_grid_retrieval(self):
         for test_org in requests.get(BASE_URL).json()['items']:
-            grid = test_org['external_ids']['GRID']['preferred']
-            output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
-            self.verify_single_item(output, test_org)
+                grid = test_org['external_ids']['GRID']['preferred']
+                output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json()
+                self.verify_single_item(output, test_org)
 
     def test_error(self):
         output = requests.get(BASE_URL, {

diff --git a/rorapi/tests/tests_integration/tests_search_v1.py b/rorapi/tests/tests_integration/tests_search_v1.py
@@ -117,7 +117,7 @@ def test_typos(self):
                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
             }).json()
         self.assertTrue(items['number_of_results'] > 0)
-        self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
+        self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
 
     def test_extra_word(self):
         items = requests.get(

diff --git a/rorapi/tests/tests_integration/tests_search_v2.py b/rorapi/tests/tests_integration/tests_search_v2.py
@@ -117,7 +117,7 @@ def test_typos(self):
                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
             }).json()
         self.assertTrue(items['number_of_results'] > 0)
-        self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
+        self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
 
     def test_extra_word(self):
         items = requests.get(