From 07d5a70b5a4a2cf3591d634ecbc093a1c9d5c96b Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Wed, 19 Jun 2024 12:58:38 -0400 Subject: [PATCH 01/11] Remove old migration --- .../commands/populate_sql_user_data.py | 49 ------------------- .../migrations/0057_populate_sql_user_data.py | 11 ++--- corehq/apps/users/tests/test_user_data.py | 48 ------------------ 3 files changed, 3 insertions(+), 105 deletions(-) delete mode 100644 corehq/apps/users/management/commands/populate_sql_user_data.py diff --git a/corehq/apps/users/management/commands/populate_sql_user_data.py b/corehq/apps/users/management/commands/populate_sql_user_data.py deleted file mode 100644 index 167cd3329159..000000000000 --- a/corehq/apps/users/management/commands/populate_sql_user_data.py +++ /dev/null @@ -1,49 +0,0 @@ -# One-off migration, March 2024 - -from django.contrib.auth.models import User -from django.core.management.base import BaseCommand - -from dimagi.utils.chunked import chunked - -from corehq.apps.case_search.const import COMMCARE_PROJECT -from corehq.apps.custom_data_fields.models import PROFILE_SLUG -from corehq.apps.users.dbaccessors import get_user_docs_by_username -from corehq.apps.users.user_data import SQLUserData -from corehq.util.log import with_progress_bar -from corehq.util.queries import queryset_to_iterator - - -class Command(BaseCommand): - help = "Populate SQL user data from couch" - - def handle(self, **options): - queryset = get_users_without_user_data() - users = with_progress_bar(queryset_to_iterator(queryset, User), queryset.count()) - for chunk in chunked(users, 100): - users_by_username = {user.username: user for user in chunk} - for user_doc in get_user_docs_by_username(users_by_username.keys()): - populate_user_data(user_doc, users_by_username[user_doc['username']]) - - -def get_users_without_user_data(): - return User.objects.filter(sqluserdata__isnull=True) - - -def populate_user_data(user_doc, django_user): - if user_doc['doc_type'] == 'WebUser': - domains = user_doc['domains'] - else: - domains = [user_doc['domain']] - - for domain in domains: - raw_user_data = user_doc.get('user_data', {}) - raw_user_data.pop(COMMCARE_PROJECT, None) - profile_id = raw_user_data.pop(PROFILE_SLUG, None) - if raw_user_data or profile_id: - SQLUserData.objects.create( - user_id=user_doc['_id'], - domain=domain, - data=raw_user_data, - django_user=django_user, - profile_id=profile_id, - ) diff --git a/corehq/apps/users/migrations/0057_populate_sql_user_data.py b/corehq/apps/users/migrations/0057_populate_sql_user_data.py index 1e90b930f4a5..32cb9f068588 100644 --- a/corehq/apps/users/migrations/0057_populate_sql_user_data.py +++ b/corehq/apps/users/migrations/0057_populate_sql_user_data.py @@ -1,12 +1,6 @@ -from django.core.management import call_command from django.db import migrations -from corehq.util.django_migrations import skip_on_fresh_install - - -@skip_on_fresh_install -def populate_sql_user_data(apps, schema_editor): - call_command('populate_sql_user_data') +from corehq.util.django_migrations import prompt_for_historical_migration, get_migration_name class Migration(migrations.Migration): @@ -16,5 +10,6 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython(populate_sql_user_data, migrations.RunPython.noop) + prompt_for_historical_migration( + "users", get_migration_name(__file__), "1359e6ed5b12d929d97f5a59f0f0181a3811ccdc") ] diff --git a/corehq/apps/users/tests/test_user_data.py b/corehq/apps/users/tests/test_user_data.py index bc4ae9cbabfc..09ef75364b29 100644 --- a/corehq/apps/users/tests/test_user_data.py +++ b/corehq/apps/users/tests/test_user_data.py @@ -6,10 +6,6 @@ from corehq.apps.custom_data_fields.models import CustomDataFieldsProfile, Field, CustomDataFieldsDefinition from corehq.apps.users.views.mobile.custom_data_fields import CUSTOM_USER_DATA_FIELD_TYPE from corehq.apps.users.dbaccessors import delete_all_users -from corehq.apps.users.management.commands.populate_sql_user_data import ( - get_users_without_user_data, - populate_user_data, -) from corehq.apps.users.models import CommCareUser, WebUser from corehq.apps.users.user_data import ( SQLUserData, @@ -72,50 +68,6 @@ def test_web_users(self): 'what_domain_is_it': 'domain 2', }) - def test_migrate_get_users_without_user_data(self): - users_without_data = [ - self.make_commcare_user(), - self.make_commcare_user(), - self.make_web_user(), - self.make_web_user(), - ] - users_with_data = [self.make_commcare_user(), self.make_web_user()] - for user in users_with_data: - ud = user.get_user_data(self.domain) - ud['key'] = 'dummy val so this is non-empty' - ud.save() - - users_to_migrate = get_users_without_user_data() - self.assertItemsEqual( - [u.username for u in users_without_data], - [u.username for u in users_to_migrate], - ) - - def test_migrate_commcare_user(self): - user = self.make_commcare_user() - user['user_data'] = {'favorite_color': 'purple'} - user.save() - populate_user_data(CommCareUser.get_db().get(user._id), user.get_django_user()) - sql_data = SQLUserData.objects.get(domain=self.domain, user_id=user.user_id) - self.assertEqual(sql_data.data['favorite_color'], 'purple') - - def test_migrate_web_user(self): - user = self.make_web_user() - # one user data dictionary, gets copied to all domains - user['user_data'] = {'favorite_color': 'purple'} - user.add_domain_membership('domain2', timezone='UTC') - user.save() - populate_user_data(WebUser.get_db().get(user._id), user.get_django_user()) - for domain in [self.domain, 'domain2']: - sql_data = SQLUserData.objects.get(domain=domain, user_id=user.user_id) - self.assertEqual(sql_data.data['favorite_color'], 'purple') - - def test_migrate_user_no_data(self): - user = self.make_commcare_user() - populate_user_data(CommCareUser.get_db().get(user._id), user.get_django_user()) - with self.assertRaises(SQLUserData.DoesNotExist): - SQLUserData.objects.get(domain=self.domain, user_id=user.user_id) - def test_prime_user_data_caches(self): users = [ self.make_commcare_user(), From 160d540a365046bc4e417c57a067ad1e8dade5af Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Thu, 20 Jun 2024 13:15:55 -0400 Subject: [PATCH 02/11] This is no longer a class attribute It is overridden anyways in dehydrate --- corehq/apps/api/resources/v0_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/apps/api/resources/v0_1.py b/corehq/apps/api/resources/v0_1.py index c370a37f0f70..3797f8e049b4 100644 --- a/corehq/apps/api/resources/v0_1.py +++ b/corehq/apps/api/resources/v0_1.py @@ -61,7 +61,7 @@ class Meta(CustomResourceMeta): class CommCareUserResource(UserResource): groups = fields.ListField(attribute='get_group_ids') - user_data = fields.DictField(attribute='user_data') + user_data = fields.DictField() class Meta(UserResource.Meta): authentication = RequirePermissionAuthentication(HqPermissions.edit_commcare_users) From 45137f39b8fc89527c3ab3c89913b0977020125b Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Thu, 20 Jun 2024 15:25:33 -0400 Subject: [PATCH 03/11] Check user data changes against SQL original This was previously checking that user data changed before calling `add_changes`, and then checking again inside `add_changes`, but this time against the couch field, which shouldn't be in-use anymore --- corehq/apps/api/user_updates.py | 2 +- corehq/apps/user_importer/helpers.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/corehq/apps/api/user_updates.py b/corehq/apps/api/user_updates.py index bbb6ad188121..433fc6106f93 100644 --- a/corehq/apps/api/user_updates.py +++ b/corehq/apps/api/user_updates.py @@ -119,7 +119,7 @@ def _update_user_data(user, new_user_data, user_change_logger): if user_change_logger and changed: user_change_logger.add_changes({ 'user_data': user.get_user_data(user.domain).raw - }) + }, skip_confirmation=True) def _update_user_role(user, role, user_change_logger): diff --git a/corehq/apps/user_importer/helpers.py b/corehq/apps/user_importer/helpers.py index c46c533bfb46..8744f8290d40 100644 --- a/corehq/apps/user_importer/helpers.py +++ b/corehq/apps/user_importer/helpers.py @@ -41,17 +41,15 @@ def __init__(self, upload_domain, user_domain, user, is_new_user, changed_by_use if not is_new_user: self.original_user_doc = self.user.to_json() - self.original_user_data = self.user.get_user_data(user_domain).raw else: self.original_user_doc = None - self.original_user_data = None self.fields_changed = {} self.change_messages = {} self._save = False # flag to check if log needs to be saved for updates - def add_changes(self, changes): + def add_changes(self, changes, skip_confirmation=False): """ Add changes to user properties. Ignored for new user since the whole user doc is logged for a new user @@ -60,7 +58,7 @@ def add_changes(self, changes): if self.is_new_user: return for name, new_value in changes.items(): - if self.original_user_doc[name] != new_value: + if skip_confirmation or self.original_user_doc[name] != new_value: self.fields_changed[name] = new_value self._save = True @@ -157,6 +155,8 @@ def update_user_data(self, data, uncategorized_data, profile_name, profiles_by_n from corehq.apps.users.user_data import UserDataError user_data = self.user.get_user_data(self.user_domain) old_profile_id = user_data.profile_id + old_user_data = user_data.raw + if PROFILE_SLUG in data: raise UserUploadError(_("You cannot set {} directly").format(PROFILE_SLUG)) if profile_name: @@ -174,20 +174,16 @@ def update_user_data(self, data, uncategorized_data, profile_name, profiles_by_n if user_data.profile_id and user_data.profile_id != old_profile_id: self.logger.add_info(UserChangeMessage.profile_info(user_data.profile_id, profile_name)) + if old_user_data != user_data.raw: + self.logger.add_changes({'user_data': user_data.raw}, skip_confirmation=True) + def save_log(self): # Tracking for role is done post save to have role setup correctly on save if self.role_updated: new_role = self.user.get_role(domain=self.user_domain) self.logger.add_info(UserChangeMessage.role_change(new_role)) - - self._include_user_data_changes() return self.logger.save() - def _include_user_data_changes(self): - new_user_data = self.user.get_user_data(self.user_domain).raw - if self.logger.original_user_data != new_user_data: - self.logger.add_changes({'user_data': new_user_data}) - class CommCareUserImporter(BaseUserImporter): From 8e813e2e5daad92d7f6bfb54d6e611f8f431f23c Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Fri, 21 Jun 2024 09:30:45 -0400 Subject: [PATCH 04/11] Log user data changes and not just from to_json --- corehq/apps/users/tests/test_log_user_change.py | 4 +++- corehq/apps/users/util.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/corehq/apps/users/tests/test_log_user_change.py b/corehq/apps/users/tests/test_log_user_change.py index 4dcc58f634af..22b4b028a65f 100644 --- a/corehq/apps/users/tests/test_log_user_change.py +++ b/corehq/apps/users/tests/test_log_user_change.py @@ -16,8 +16,10 @@ class TestLogUserChange(TestCase): def setUpClass(cls): cls.project = create_domain(cls.domain) cls.web_user = WebUser.create(cls.domain, 'test@commcarehq.org', '******', + user_data={'favorite_color': 'cyan'}, created_by=None, created_via=None) cls.commcare_user = CommCareUser.create(cls.domain, f'test@{cls.domain}.commcarehq.org', '******', + user_data={'favorite_color': 'purple'}, created_by=cls.web_user, created_via=USER_CHANGE_VIA_WEB) @classmethod @@ -186,7 +188,7 @@ def _get_expected_changes_json(user): 'status': None, 'subscribed_to_commcare_users': False, 'two_factor_auth_disabled_until': None, - 'user_data': {}, + 'user_data': user.get_user_data('test').raw, 'user_location_id': None, 'username': user_json['username'] } diff --git a/corehq/apps/users/util.py b/corehq/apps/users/util.py index 8e74883dbbd2..1b25b35329f6 100644 --- a/corehq/apps/users/util.py +++ b/corehq/apps/users/util.py @@ -398,7 +398,7 @@ def log_user_change(by_domain, for_domain, couch_user, changed_by_user, changed_ changed_by_repr=changed_by_repr, user_id=couch_user.get_id, changed_by=changed_by_id, - changes=_get_changed_details(couch_user, action, fields_changed), + changes=_get_changed_details(couch_user, action, fields_changed, for_domain), changed_via=changed_via, change_messages=change_messages, action=action.value, @@ -406,11 +406,12 @@ def log_user_change(by_domain, for_domain, couch_user, changed_by_user, changed_ ) -def _get_changed_details(couch_user, action, fields_changed): +def _get_changed_details(couch_user, action, fields_changed, for_domain): from corehq.apps.users.model_log import UserModelAction if action in [UserModelAction.CREATE, UserModelAction.DELETE]: changed_details = couch_user.to_json() + changed_details['user_data'] = couch_user.get_user_data(for_domain).raw if for_domain else {} else: changed_details = fields_changed.copy() From 983b0870d29e66a27e0a1acfc69a794941d76e8c Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Fri, 21 Jun 2024 09:44:49 -0400 Subject: [PATCH 05/11] Remove unused user data field The migration will come later --- corehq/apps/users/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/corehq/apps/users/models.py b/corehq/apps/users/models.py index 79bb3158331c..0407fa625f7b 100644 --- a/corehq/apps/users/models.py +++ b/corehq/apps/users/models.py @@ -34,7 +34,6 @@ BooleanProperty, DateProperty, DateTimeProperty, - DictProperty, Document, DocumentSchema, IntegerProperty, @@ -985,7 +984,6 @@ class CouchUser(Document, DjangoUserMixin, IsMemberOfMixin, EulaMixin): language = StringProperty() subscribed_to_commcare_users = BooleanProperty(default=False) announcements_seen = ListProperty() - user_data = DictProperty() # use get_user_data object instead of accessing this directly # This should not be set directly but using set_location method only location_id = StringProperty() assigned_location_ids = StringListProperty() From d04391cea949e205387ed170a6c8ab25a0e134ba Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Fri, 21 Jun 2024 13:02:28 -0400 Subject: [PATCH 06/11] Just use ES results directly The query only returns docs that _don't_ have that value set, then fetches those from couch and attempts to get that value. However, this information is no longer stored in couch anyways. --- corehq/apps/geospatial/views.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/corehq/apps/geospatial/views.py b/corehq/apps/geospatial/views.py index 17275bc9db44..bdfd43314e12 100644 --- a/corehq/apps/geospatial/views.py +++ b/corehq/apps/geospatial/views.py @@ -16,7 +16,6 @@ import jsonschema from memoized import memoized -from dimagi.utils.couch.bulk import get_docs from dimagi.utils.couch.database import iter_docs from dimagi.utils.web import json_request, json_response @@ -367,32 +366,28 @@ def get_paginated_cases_without_gps(self, domain, page, limit): def _get_paginated_users_without_gps(domain, page, limit, query): location_prop_name = get_geo_user_property(domain) - query = ( + res = ( UserES() .domain(domain) .mobile_users() .missing_or_empty_user_data_property(location_prop_name) .search_string_query(query, ['username']) + .fields(['_id', 'username']) .sort('created_on', desc=True) + .start((page - 1) * limit) + .size(limit) + .run() ) - - paginator = Paginator(query.get_ids(), limit) - user_ids_page = list(paginator.get_page(page)) - user_docs = get_docs(CommCareUser.get_db(), keys=user_ids_page) - user_data = [] - for user_doc in user_docs: - lat, lon = get_lat_lon_from_dict(user_doc['user_data'], location_prop_name) - user_data.append( - { - 'id': user_doc['_id'], - 'name': user_doc['username'].split('@')[0], - 'lat': lat, - 'lon': lon, - } - ) return { - 'items': user_data, - 'total': paginator.count, + 'items': [ + { + 'id': hit['_id'], + 'name': hit['username'].split('@')[0], + 'lat': '', + 'lon': '', + } for hit in res.hits + ], + 'total': res.total, } From 50811775005fcb578ea518daab76850ab9ae0847 Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Fri, 21 Jun 2024 16:48:27 -0400 Subject: [PATCH 07/11] Fix related doc lookups for user data These have been broken since the migration to SQL, apparently --- corehq/apps/userreports/expressions/specs.py | 10 ++++++---- .../userreports/tests/test_expressions.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/corehq/apps/userreports/expressions/specs.py b/corehq/apps/userreports/expressions/specs.py index 9bed1ba60a07..33d042560b39 100644 --- a/corehq/apps/userreports/expressions/specs.py +++ b/corehq/apps/userreports/expressions/specs.py @@ -600,19 +600,21 @@ def __call__(self, item, evaluation_context=None): @staticmethod @ucr_context_cache(vary_on=('related_doc_type', 'doc_id',)) def _get_document(related_doc_type, doc_id, evaluation_context): + domain = evaluation_context.root_doc['domain'] + assert domain document_store = get_document_store_for_doc_type( - evaluation_context.root_doc['domain'], related_doc_type, - load_source="related_doc_expression") + domain, related_doc_type, load_source="related_doc_expression") try: doc = document_store.get_document(doc_id) except DocumentNotFoundError: return None - if evaluation_context.root_doc['domain'] != doc.get('domain'): + if domain != doc.get('domain'): return None + if related_doc_type == 'CommCareUser': + doc['user_data'] = CommCareUser.wrap(doc).get_user_data(domain).to_dict() return doc def get_value(self, doc_id, evaluation_context): - assert evaluation_context.root_doc['domain'] doc = self._get_document(self.related_doc_type, doc_id, evaluation_context) # explicitly use a new evaluation context since this is a new document return self._value_expression(doc, EvaluationContext(doc, 0)) diff --git a/corehq/apps/userreports/tests/test_expressions.py b/corehq/apps/userreports/tests/test_expressions.py index db36ddc3c348..0292f57dfefd 100644 --- a/corehq/apps/userreports/tests/test_expressions.py +++ b/corehq/apps/userreports/tests/test_expressions.py @@ -1028,6 +1028,25 @@ def test_other_lookups(self): doc = self._get_doc(user_id) self.assertEqual(user_id, expression(doc, EvaluationContext(doc, 0))) + def test_user_data_lookup(self): + user = CommCareUser.create(self.domain, 'username', "123", None, None, + user_data={'favorite_color': 'indigo'}) + self.addCleanup(user.delete, None, None) + expression = ExpressionFactory.from_spec({ + "type": "related_doc", + "related_doc_type": 'CommCareUser', + "doc_id_expression": { + "type": "property_name", + "property_name": "related_id" + }, + "value_expression": { + "type": "property_path", + "property_path": ["user_data", "favorite_color"], + }, + }) + doc = self._get_doc(user._id) + self.assertEqual('indigo', expression(doc, EvaluationContext(doc, 0))) + @staticmethod def _get_expression(doc_type): return ExpressionFactory.from_spec({ From ca7845f66d8fcf465f59a80eda8fc9b740f4c4ef Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Mon, 24 Jun 2024 14:18:45 -0400 Subject: [PATCH 08/11] Remove db requirement for champ UCR tests --- custom/champ/tests/test_champ_cameroon.py | 2 ++ custom/champ/tests/test_enhanced_peer_mobilization.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/custom/champ/tests/test_champ_cameroon.py b/custom/champ/tests/test_champ_cameroon.py index 7fadbd355c98..78573f4b0826 100644 --- a/custom/champ/tests/test_champ_cameroon.py +++ b/custom/champ/tests/test_champ_cameroon.py @@ -1,12 +1,14 @@ from datetime import date from corehq.apps.userreports.specs import EvaluationContext +from corehq.apps.users.tests.util import patch_user_data_db_layer from custom.champ.tests.utils import TestDataSourceExpressions from custom.champ.utils import POST_TEST_XMLNS, ACCOMPAGNEMENT_XMLNS, SUIVI_MEDICAL_XMLNS CHAMP_CAMEROON_DATA_SOURCE = 'champ_cameroon.json' +@patch_user_data_db_layer() class TestEnhancedPeerMobilization(TestDataSourceExpressions): data_source_name = CHAMP_CAMEROON_DATA_SOURCE diff --git a/custom/champ/tests/test_enhanced_peer_mobilization.py b/custom/champ/tests/test_enhanced_peer_mobilization.py index 59f483d250a2..f3165abb3654 100644 --- a/custom/champ/tests/test_enhanced_peer_mobilization.py +++ b/custom/champ/tests/test_enhanced_peer_mobilization.py @@ -1,11 +1,14 @@ from datetime import date + from corehq.apps.userreports.specs import EvaluationContext +from corehq.apps.users.tests.util import patch_user_data_db_layer from custom.champ.tests.utils import TestDataSourceExpressions from custom.champ.utils import PREVENTION_XMLNS, TARGET_XMLNS ENHANCED_PEER_MOBILIZATION_DATA_SOURCE = 'enhanced_peer_mobilization.json' +@patch_user_data_db_layer() class TestEnhancedPeerMobilization(TestDataSourceExpressions): data_source_name = ENHANCED_PEER_MOBILIZATION_DATA_SOURCE From ab0ed8ad54646831aa61445f379d5d74c152fe9f Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Wed, 19 Jun 2024 15:22:42 -0400 Subject: [PATCH 09/11] Remove old, unused user_data field --- .../management/commands/rm_couch_user_data.py | 31 +++++++++++++++++++ corehq/dbaccessors/couchapps/all_docs.py | 12 +++++++ 2 files changed, 43 insertions(+) create mode 100644 corehq/apps/users/management/commands/rm_couch_user_data.py diff --git a/corehq/apps/users/management/commands/rm_couch_user_data.py b/corehq/apps/users/management/commands/rm_couch_user_data.py new file mode 100644 index 000000000000..78d92e881ac4 --- /dev/null +++ b/corehq/apps/users/management/commands/rm_couch_user_data.py @@ -0,0 +1,31 @@ +# One-off migration, June 2024 + +from itertools import chain + +from django.core.management.base import BaseCommand + +from corehq.apps.users.models import CommCareUser +from corehq.dbaccessors.couchapps.all_docs import ( + get_doc_count_by_type, + iter_all_doc_ids, +) +from corehq.util.couch import DocUpdate, iter_update +from corehq.util.log import with_progress_bar + + +class Command(BaseCommand): + help = "Populate SQL user data from couch" + + def handle(self, **options): + db = CommCareUser.get_db() + count = (get_doc_count_by_type(db, 'WebUser') + + get_doc_count_by_type(db, 'CommCareUser')) + all_ids = chain(iter_all_doc_ids(db, 'WebUser'), + iter_all_doc_ids(db, 'CommCareUser')) + iter_update(db, _update_user, with_progress_bar(all_ids, count), verbose=True) + + +def _update_user(user_doc): + couch_data = user_doc.pop('user_data', ...) + if couch_data is not ...: + return DocUpdate(user_doc) diff --git a/corehq/dbaccessors/couchapps/all_docs.py b/corehq/dbaccessors/couchapps/all_docs.py index 2bea0755d1c6..8dd37d0f7361 100644 --- a/corehq/dbaccessors/couchapps/all_docs.py +++ b/corehq/dbaccessors/couchapps/all_docs.py @@ -37,6 +37,18 @@ def get_doc_count_by_type(db, doc_type): return 0 +def iter_all_doc_ids(db, doc_type): + return (row['id'] for row in paginate_view( + db, + 'all_docs/by_doc_type', + chunk_size=10000, + startkey=[doc_type], + endkey=[doc_type, {}], + include_docs=False, + reduce=False, + )) + + def get_doc_count_by_domain_type(db, domain, doc_type): key = [domain, doc_type] result = db.view( From a181308ec07a3416fc8623105d57e10e31f0f73a Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Mon, 24 Jun 2024 16:06:42 -0400 Subject: [PATCH 10/11] Make body of migration run only once --- corehq/apps/domain_migration_flags/api.py | 23 +++++++++ .../tests/test_domain_migration_progress.py | 48 +++++++++++++++++-- .../management/commands/rm_couch_user_data.py | 21 ++++---- 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/corehq/apps/domain_migration_flags/api.py b/corehq/apps/domain_migration_flags/api.py index 68003132d1c2..3eabdd5633f7 100644 --- a/corehq/apps/domain_migration_flags/api.py +++ b/corehq/apps/domain_migration_flags/api.py @@ -1,6 +1,7 @@ import logging from collections import defaultdict from datetime import datetime +from functools import wraps from django.db.models import Q @@ -126,3 +127,25 @@ def all_domains_with_migrations_in_progress(): def reset_caches(domain, slug): any_migrations_in_progress(domain, strict=True) get_migration_status(domain, slug, strict=True) + + +def once_off_migration(slug): + """Ensure that the body of a migration is run only once + + You can still run it again if there is an exception + """ + def outer(migration_fn): + @wraps(migration_fn) + def inner(*args, **kwargs): + if get_migration_complete(ALL_DOMAINS, slug): + return + set_migration_started(ALL_DOMAINS, slug) + try: + res = migration_fn(*args, **kwargs) + except Exception: + set_migration_not_started(ALL_DOMAINS, slug) + raise + set_migration_complete(ALL_DOMAINS, slug) + return res + return inner + return outer diff --git a/corehq/apps/domain_migration_flags/tests/test_domain_migration_progress.py b/corehq/apps/domain_migration_flags/tests/test_domain_migration_progress.py index 2b1960f117f1..463457d9914e 100644 --- a/corehq/apps/domain_migration_flags/tests/test_domain_migration_progress.py +++ b/corehq/apps/domain_migration_flags/tests/test_domain_migration_progress.py @@ -1,13 +1,16 @@ import uuid from datetime import datetime +from unittest.mock import Mock from django.test import TestCase from corehq.apps.domain_migration_flags.api import ( + ALL_DOMAINS, any_migrations_in_progress, get_migration_complete, get_migration_status, migration_in_progress, + once_off_migration, set_migration_complete, set_migration_not_started, set_migration_started, @@ -20,10 +23,8 @@ class DomainMigrationProgressTest(TestCase): - @classmethod - def setUpClass(cls): - super(DomainMigrationProgressTest, cls).setUpClass() - cls.slug = uuid.uuid4().hex + def setUp(self): + self.slug = uuid.uuid4().hex def get_progress(self, domain): return DomainMigrationProgress.objects.get(domain=domain, migration_slug=self.slug) @@ -109,3 +110,42 @@ def test_any_migrations_in_progress(self): self.assertTrue(any_migrations_in_progress('purple')) set_migration_complete('purple', 'other_slug') self.assertFalse(any_migrations_in_progress('purple')) + + def test_once_off_decorator(self): + actual_migration = Mock() + + @once_off_migration(self.slug) + def basic_migration(): + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.IN_PROGRESS) + actual_migration() + + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.NOT_STARTED) + actual_migration.assert_not_called() + + basic_migration() + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.COMPLETE) + actual_migration.assert_called_once() + + basic_migration() + actual_migration.assert_called_once() + + def test_once_off_decorator_failure(self): + actual_migration = Mock() + + @once_off_migration(self.slug) + def failing_migration(): + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.IN_PROGRESS) + actual_migration() + raise ValueError('this migration failed') + + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.NOT_STARTED) + self.assertEqual(actual_migration.call_count, 0) + + with self.assertRaises(ValueError): + failing_migration() + self.assertEqual(get_migration_status(ALL_DOMAINS, self.slug), MigrationStatus.NOT_STARTED) + self.assertEqual(actual_migration.call_count, 1) + + with self.assertRaises(ValueError): + failing_migration() + self.assertEqual(actual_migration.call_count, 2) diff --git a/corehq/apps/users/management/commands/rm_couch_user_data.py b/corehq/apps/users/management/commands/rm_couch_user_data.py index 78d92e881ac4..cf9498828e68 100644 --- a/corehq/apps/users/management/commands/rm_couch_user_data.py +++ b/corehq/apps/users/management/commands/rm_couch_user_data.py @@ -1,9 +1,9 @@ # One-off migration, June 2024 - from itertools import chain from django.core.management.base import BaseCommand +from corehq.apps.domain_migration_flags.api import once_off_migration from corehq.apps.users.models import CommCareUser from corehq.dbaccessors.couchapps.all_docs import ( get_doc_count_by_type, @@ -16,13 +16,18 @@ class Command(BaseCommand): help = "Populate SQL user data from couch" - def handle(self, **options): - db = CommCareUser.get_db() - count = (get_doc_count_by_type(db, 'WebUser') - + get_doc_count_by_type(db, 'CommCareUser')) - all_ids = chain(iter_all_doc_ids(db, 'WebUser'), - iter_all_doc_ids(db, 'CommCareUser')) - iter_update(db, _update_user, with_progress_bar(all_ids, count), verbose=True) + def handle(self, *args, **options): + do_migration() + + +@once_off_migration("rm_couch_user_data") +def do_migration(): + db = CommCareUser.get_db() + count = (get_doc_count_by_type(db, 'WebUser') + + get_doc_count_by_type(db, 'CommCareUser')) + all_ids = chain(iter_all_doc_ids(db, 'WebUser'), + iter_all_doc_ids(db, 'CommCareUser')) + iter_update(db, _update_user, with_progress_bar(all_ids, count), verbose=True) def _update_user(user_doc): From da40b4d6576e00fb2d8e2ca6a186a43a2b3a4b48 Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Thu, 27 Jun 2024 14:56:25 -0400 Subject: [PATCH 11/11] update help text --- corehq/apps/users/management/commands/rm_couch_user_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/apps/users/management/commands/rm_couch_user_data.py b/corehq/apps/users/management/commands/rm_couch_user_data.py index cf9498828e68..1001156dd95d 100644 --- a/corehq/apps/users/management/commands/rm_couch_user_data.py +++ b/corehq/apps/users/management/commands/rm_couch_user_data.py @@ -14,7 +14,7 @@ class Command(BaseCommand): - help = "Populate SQL user data from couch" + help = "Remove now-stale CouchUser.user_data field" def handle(self, *args, **options): do_migration()