Use both location name and code for individual upload

To minimize ambiguous location issue
openimis · Nov 25, 2024 · 6f83281 · 6f83281
1 parent 263b4ba
commit 6f83281
Show file tree

Hide file tree

Showing 13 changed files with 83 additions and 57 deletions.
diff --git a/individual/apps.py b/individual/apps.py
@@ -41,7 +41,7 @@
         'json_ext.educated_level'
     ],
     "individual_base_fields": [
-        'first_name', 'last_name', 'dob', 'location_name', 'id'
+        'first_name', 'last_name', 'dob', 'location_name', 'location_code', 'id'
     ]
 }
 

diff --git a/individual/management/commands/fake_individuals.py b/individual/management/commands/fake_individuals.py
@@ -43,7 +43,8 @@ def generate_fake_individual(group_code, recipient_info, individual_role, locati
         "number_of_elderly": fake.random_int(min=0, max=5),
         "number_of_children": fake.random_int(min=0, max=10),
         "beneficiary_data_source": fake.company(),
-        "location_name": location.name if location else ""
+        "location_name": location.name if location else "",
+        "location_code": location.code if location else "",
     }
 
 

diff --git a/individual/services.py b/individual/services.py
@@ -584,9 +584,9 @@ def process_chunk(
         chunk,
         properties,
         unique_validations,
-        loc_name_dist_ids_from_db,
+        loc_name_code_district_ids_from_db,
         user_allowed_loc_ids,
-        duplicate_village_names,
+        duplicate_village_name_code_tuples,
     ):
         validated_dataframe = []
         check_location = 'location_name' in chunk.columns
@@ -607,9 +607,10 @@ def process_chunk(
                 field_validation['validations']['location_name'] = (
                     IndividualImportService._validate_location(
                         row.location_name,
-                        loc_name_dist_ids_from_db,
+                        row.location_code,
+                        loc_name_code_district_ids_from_db,
                         user_allowed_loc_ids,
-                        duplicate_village_names,
+                        duplicate_village_name_code_tuples,
                     )
                 )
 
@@ -632,62 +633,72 @@ def _validate_possible_individuals(self, dataframe: DataFrame, upload_id: uuid):
         check_location = 'location_name' in dataframe.columns
         if check_location:
             # Issue a single DB query instead of per row for efficiency
-            loc_name_dist_ids_from_db = self._query_location_district_ids(dataframe.location_name)
+            loc_name_code_district_ids_from_db = self._query_location_district_ids(dataframe)
             user_allowed_loc_ids = LocationManager().get_allowed_ids(self.user)
-            duplicate_village_names = self._query_dupcliate_village_names()
+            duplicate_village_name_code_tuples = self._query_duplicate_village_name_code()
         else:
-            loc_name_dist_ids_from_db = None
+            loc_name_code_district_ids_from_db = None
             user_allowed_loc_ids = None
-            duplicate_village_names = None
+            duplicate_village_name_code_tuples = None
 
         # TODO: Use ProcessPoolExecutor after resolving django dependency loading issue
         validated_dataframe = IndividualImportService.process_chunk(
             dataframe,
             properties,
             unique_validations,
-            loc_name_dist_ids_from_db,
+            loc_name_code_district_ids_from_db,
             user_allowed_loc_ids,
-            duplicate_village_names,
+            duplicate_village_name_code_tuples,
         )
 
         self.save_validation_error_in_data_source_bulk(validated_dataframe)
         invalid_items = fetch_summary_of_broken_items(upload_id)
         return validated_dataframe, invalid_items
 
     @staticmethod
-    def _query_location_district_ids(location_names):
-        locations = Location.objects.filter(type="V", name__in=location_names.unique().tolist())
-        return {loc.name: loc.parent.parent.id for loc in locations}
+    def _query_location_district_ids(df):
+        unique_tuples = df[['location_name', 'location_code']].drop_duplicates()
+        query = Q()
+        for _, row in unique_tuples.iterrows():
+            query |= Q(name=row['location_name'], code=row['location_code'])
+        locations = Location.objects.filter(type="V").filter(query)
+        return {(loc.name, loc.code): loc.parent.parent.id for loc in locations}
 
     @staticmethod
-    def _query_dupcliate_village_names():
+    def _query_duplicate_village_name_code():
         return (
             Location.objects
             .filter(type="V")
-            .values('name')
-            .annotate(name_count=Count('name'))
+            .values('name', 'code')
+            .annotate(name_count=Count('id'))
             .filter(name_count__gt=1)
-            .values_list('name', flat=True)
+            .values_list('name', 'code')
         )
 
     @staticmethod
-    def _validate_location(location_name, loc_name_dist_ids_from_db, user_allowed_loc_ids, duplicate_village_names):
+    def _validate_location(
+        location_name,
+        location_code,
+        loc_name_code_district_ids_from_db,
+        user_allowed_loc_ids,
+        duplicate_village_name_code_tuples
+    ):
         result = {
             'field_name': 'location_name',
         }
-        if location_name is None or location_name == "":
+        if (location_name is None or location_name == "") and (location_code is None or location_code == ""):
             result['success'] = True
-        elif loc_name_dist_ids_from_db is None and user_allowed_loc_ids is None:
+        elif loc_name_code_district_ids_from_db is None and user_allowed_loc_ids is None:
             result['success'] = True
-        elif location_name not in loc_name_dist_ids_from_db:
+        elif (location_name, location_code) not in loc_name_code_district_ids_from_db:
             result['success'] = False
-            result['note'] = f"'location_name' value '{location_name}' is not a valid location name. Please check the spelling against the list of locations in the system."
-        elif location_name in duplicate_village_names:
+            result['note'] = f"Location with name '{location_name}' and code '{location_code}' is not valid. Please check the spelling against the list of locations in the system."
+        elif (location_name, location_code) in duplicate_village_name_code_tuples:
             result['success'] = False
-            result['note'] = f"'location_name' value '{location_name}' is ambiguous, because there are more than one location with this name found in the system."
-        elif loc_name_dist_ids_from_db[location_name] not in user_allowed_loc_ids:
+            result['note'] = f"Location with name '{location_name}' and code '{location_code}' is ambiguous, because there are more than one location with this name and code found in the system."
+        elif loc_name_code_district_ids_from_db[(location_name, location_code)] not in user_allowed_loc_ids:
             result['success'] = False
-            result['note'] = f"'location_name' value '{location_name}' is outside the current user's location permissions."
+            result['note'] = f"Location with name '{location_name}' and code '{location_code}' is outside the current user's location permissions."
         else:
             result['success'] = True
         return result

diff --git a/individual/tests/fixtures/individual_upload.csv b/individual/tests/fixtures/individual_upload.csv
@@ -1,8 +1,7 @@
-first_name,last_name,dob,location_name,individual_role,recipient_info,group_code
-Mia,Foo,1989-01-01,Washington DC,HEAD,1,AA
-Bob,Foo,1988-01-01,Washington DC,SPOUSE,1,AA
-Eric,Foo,2020-01-01,Washington DC,SON,1,AA
-Ann,Foo,2020-01-01,Washington D.C.,DAUGHTER,1,AA
-Michelle,Bar,1990-01-01,Fairfax,HEAD,0,BB
-Frank,Baz,1998-01-01,,HEAD,0,CC
-
+first_name,last_name,dob,location_name,location_code,individual_role,recipient_info,group_code
+Mia,Foo,1989-01-01,Washington DC,202,HEAD,1,AA
+Bob,Foo,1988-01-01,Washington DC,202,SPOUSE,1,AA
+Eric,Foo,2020-01-01,Washington DC,202,SON,1,AA
+Ann,Foo,2020-01-01,Washington D.C.,202,DAUGHTER,1,AA
+Michelle,Bar,1990-01-01,Fairfax,703,HEAD,0,BB
+Frank,Baz,1998-01-01,,,HEAD,0,CC
diff --git a/individual/tests/individual_import_service_test.py b/individual/tests/individual_import_service_test.py
@@ -21,6 +21,7 @@
 from unittest.mock import MagicMock, patch
 from core.utils import filter_validity
 from core.models.user import Role
+from location.models import Location
 
 
 def count_csv_records(file_path):
@@ -53,11 +54,11 @@ def setUpClass(cls):
     def setUpTestData(cls):
         cls.village_a = create_test_village({
             'name': 'Washington DC',
-            'code': 'VsA',
+            'code': '202',
         })
         cls.village_b = create_test_village({
             'name': 'Fairfax',
-            'code': 'VsB'
+            'code': '703'
         })
 
 
@@ -247,7 +248,7 @@ def test_validate_import_individuals_row_level_security(self, mock_fetch_summary
             self.assertEqual(loc_validation.get('field_name'), 'location_name')
             self.assertEqual(
                 loc_validation.get('note'),
-                f"'location_name' value '{self.village_b.name}' is outside the current user's location permissions."
+                f"Location with name '{self.village_b.name}' and code '{self.village_b.code}' is outside the current user's location permissions."
             )
 
         # Unknown individual location
@@ -259,18 +260,20 @@ def test_validate_import_individuals_row_level_security(self, mock_fetch_summary
             self.assertEqual(loc_validation.get('field_name'), 'location_name')
             self.assertEqual(
                 loc_validation.get('note'),
-                "'location_name' value 'Washington D.C.' is not a valid location name. "
+                "Location with name 'Washington D.C.' and code '202' is not valid. "
                 "Please check the spelling against the list of locations in the system."
             )
 
 
     @patch('individual.services.load_dataframe')
     @patch('individual.services.fetch_summary_of_broken_items')
     def test_validate_import_individuals_ambiguous_location_name(self, mock_fetch_summary, mock_load_dataframe):
-        # set up another location also named Fairfax and save to DB
-        loc_dup_name = create_test_village({
+        # set up another location with the same named and code in DB
+        loc_dup = Location.objects.create(**{
             'name': 'Fairfax',
-            'code': 'VsB2',
+            'code': '703',
+            'type': 'V',
+            'parent': self.village_b.parent,
         })
 
         dataframe = pd.read_csv(self.csv_file_path, na_filter=False)
@@ -296,14 +299,14 @@ def test_validate_import_individuals_ambiguous_location_name(self, mock_fetch_su
         # Check that the validation flagged lack of permission and unrecognized locations
         validated_rows = result['data']
 
-        rows_ambiguous_loc = [row for row in validated_rows if row['row']['location_name'] == loc_dup_name.name]
-        self.assertTrue(rows_ambiguous_loc, f'Expected at least one row with location_name={loc_dup_name.name}')
+        rows_ambiguous_loc = [row for row in validated_rows if row['row']['location_name'] == loc_dup.name]
+        self.assertTrue(rows_ambiguous_loc, f'Expected at least one row with location_name={loc_dup.name}')
         for row in rows_ambiguous_loc:
             loc_validation = row['validations']['location_name']
             self.assertFalse(loc_validation.get('success'))
             self.assertEqual(loc_validation.get('field_name'), 'location_name')
             self.assertEqual(
                 loc_validation.get('note'),
-                "'location_name' value 'Fairfax' is ambiguous, "
-                "because there are more than one location with this name found in the system."
+                "Location with name 'Fairfax' and code '703' is ambiguous, "
+                "because there are more than one location with this name and code found in the system."
             )
diff --git a/individual/tests/test_view.py b/individual/tests/test_view.py
@@ -29,7 +29,7 @@ def test_download_template_file(self):
             response['Content-Disposition']
         )
 
-        expected_base_csv_header = f'first_name,last_name,dob,location_name,id'
+        expected_base_csv_header = f'first_name,last_name,dob,location_name,location_code,id'
         content = b"".join(response.streaming_content).decode('utf-8')
         self.assertTrue(
             expected_base_csv_header in content,

diff --git a/individual/tests/test_workflows_individual_update.py b/individual/tests/test_workflows_individual_update.py
@@ -1,3 +1,4 @@
+from django.db import connection
 from django.test import TestCase
 from core.test_helpers import create_test_interactive_user
 from individual.models import (
@@ -80,6 +81,7 @@ def setUp(self):
                 "last_name": "Doe",
                 "dob": "1980-01-01",
                 "location_name": self.village.name,
+                "location_code": self.village.code,
             }
         )
         self.valid_data_source.save(user=self.user)
@@ -96,8 +98,8 @@ def setUp(self):
 
     @patch('individual.apps.IndividualConfig.enable_maker_checker_for_individual_update', False)
     @skipIf(
-    connection.vendor != "postgresql",
-    "Skipping tests due to implementation usage of validate_json_schema, which is a postgres specific extension."
+        connection.vendor != "postgresql",
+        "Skipping tests due to implementation usage of validate_json_schema, which is a postgres specific extension."
     )
     def test_process_update_individuals_workflow_successful_execution(self):
         process_update_individuals_workflow(self.user_uuid, self.upload_uuid)
@@ -130,15 +132,16 @@ def test_process_update_individuals_workflow_successful_execution(self):
 
     @patch('individual.apps.IndividualConfig.enable_maker_checker_for_individual_update', False)
     @skipIf(
-    connection.vendor != "postgresql",
-    "Skipping tests due to implementation usage of validate_json_schema, which is a postgres specific extension."
+        connection.vendor != "postgresql",
+        "Skipping tests due to implementation usage of validate_json_schema, which is a postgres specific extension."
     )
     def test_process_update_individuals_workflow_with_all_valid_entries(self):
         # Update invalid entry in IndividualDataSource to valid data
         self.invalid_data_source.json_ext = {
             "ID": str(self.individual2.id),
             "first_name": self.individual2_updated_first_name,
             "location_name": None,
+            "location_code": None,
         }
         self.invalid_data_source.save(user=self.user)
 
@@ -170,6 +173,7 @@ def test_process_update_individuals_workflow_with_maker_checker_enabled(self):
             "ID": str(self.individual2.id),
             "first_name": "Jane",
             "location_name": None,
+            "location_code": None,
         }
         self.invalid_data_source.save(user=self.user)
 

diff --git a/individual/tests/test_workflows_individual_upload.py b/individual/tests/test_workflows_individual_upload.py
@@ -71,6 +71,7 @@ def setUp(self):
                 "last_name": "Doe",
                 "dob": "1980-01-01",
                 "location_name": self.village.name,
+                "location_code": self.village.code,
             }
         )
         self.valid_data_source.save(user=self.user)
@@ -117,6 +118,7 @@ def test_process_import_individuals_workflow_with_all_valid_entries(self):
             "last_name": "Doe",
             "dob": "1982-01-01",
             "location_name": None,
+            "location_code": None,
         }
         self.invalid_data_source.save(user=self.user)
 

diff --git a/individual/tests/test_workflows_utils.py b/individual/tests/test_workflows_utils.py
@@ -32,15 +32,15 @@ def tearDown(self):
         patch.stopall()
 
     def test_validate_dataframe_headers_valid(self):
-        self.executor.df = pd.DataFrame(columns=['first_name', 'last_name', 'dob', 'id', 'location_name'])
+        self.executor.df = pd.DataFrame(columns=['first_name', 'last_name', 'dob', 'id', 'location_name', 'location_code'])
         try:
             self.executor.validate_dataframe_headers()
         except PythonWorkflowHandlerException:
             self.fail("validate_dataframe_headers() raised PythonWorkflowHandlerException unexpectedly!")
 
     def test_validate_dataframe_headers_missing_required_fields(self):
         # DataFrame missing required 'first_name' column
-        self.executor.df = pd.DataFrame(columns=['last_name', 'dob', 'id', 'location_name'])
+        self.executor.df = pd.DataFrame(columns=['last_name', 'dob', 'id', 'location_name', 'location_code'])
 
         with self.assertRaises(PythonWorkflowHandlerException) as cm:
             self.executor.validate_dataframe_headers()
@@ -49,7 +49,7 @@ def test_validate_dataframe_headers_missing_required_fields(self):
 
     def test_validate_dataframe_headers_invalid_headers(self):
         # DataFrame with an invalid column
-        self.executor.df = pd.DataFrame(columns=['first_name', 'last_name', 'dob', 'id', 'location_name', 'unexpected_column'])
+        self.executor.df = pd.DataFrame(columns=['first_name', 'last_name', 'dob', 'id', 'location_name', 'location_code', 'unexpected_column'])
 
         with self.assertRaises(PythonWorkflowHandlerException) as cm:
             self.executor.validate_dataframe_headers()
@@ -58,7 +58,7 @@ def test_validate_dataframe_headers_invalid_headers(self):
 
     def test_validate_dataframe_headers_update_missing_id(self):
         # DataFrame missing 'ID' when is_update=True
-        columns = ['first_name', 'last_name', 'dob', 'id', 'location_name']
+        columns = ['first_name', 'last_name', 'dob', 'id', 'location_name', 'location_code']
         self.executor.df = pd.DataFrame(columns=columns)
 
         with self.assertRaises(PythonWorkflowHandlerException) as cm:

diff --git a/individual/workflows/base_individual_update.py b/individual/workflows/base_individual_update.py
@@ -86,6 +86,7 @@ def process_update_individuals_workflow(user_uuid, upload_uuid):
             FROM individual_individualdatasource f
             LEFT JOIN "tblLocations" AS loc
                     ON loc."LocationName" = f."Json_ext"->>'location_name'
+                    AND loc."LocationCode" = f."Json_ext"->>'location_code'
                     AND loc."LocationType"='V'
             WHERE individual_individual."UUID" = (f."Json_ext" ->> 'ID')::UUID
             returning individual_individual."UUID", f."UUID" as "individualdatasource_id")

diff --git a/individual/workflows/base_individual_upload.py b/individual/workflows/base_individual_upload.py
@@ -69,6 +69,7 @@ def process_import_individuals_workflow(user_uuid, upload_uuid):
             FROM individual_individualdatasource AS ds
             LEFT JOIN "tblLocations" AS loc
                     ON loc."LocationName" = ds."Json_ext"->>'location_name'
+                    AND loc."LocationCode" = ds."Json_ext"->>'location_code'
                     AND loc."LocationType"='V'
             WHERE ds.upload_id=current_upload_id 
                 AND ds.individual_id is null

diff --git a/individual/workflows/individual_update_valid.py b/individual/workflows/individual_update_valid.py
@@ -91,6 +91,7 @@ def process_update_valid_individuals_workflow(user_uuid, upload_uuid, accepted=N
             FROM individual_individualdatasource ids 
             LEFT JOIN "tblLocations" AS loc
                     ON loc."LocationName" = ids."Json_ext"->>'location_name'
+                    AND loc."LocationCode" = ids."Json_ext"->>'location_code'
                     AND loc."LocationType"='V'
             WHERE individual_individual."UUID" = (ids."Json_ext" ->> 'ID')::UUID
             AND ids.upload_id = current_upload_id
@@ -212,6 +213,7 @@ def process_update_valid_individuals_workflow(user_uuid, upload_uuid, accepted=N
             FROM individual_individualdatasource ids
             LEFT JOIN "tblLocations" AS loc
                     ON loc."LocationName" = ids."Json_ext"->>'location_name'
+                    AND loc."LocationCode" = ids."Json_ext"->>'location_code'
                     AND loc."LocationType"='V'
             WHERE individual_individual."UUID" = (ids."Json_ext" ->> 'ID')::UUID 
             AND ids.upload_id = current_upload_id