-
-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove user data from couch user model #34805
Changes from all commits
07d5a70
160d540
45137f3
8e813e2
983b087
d04391c
5081177
ca7845f
ab0ed8a
a181308
da40b4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import logging | ||
from collections import defaultdict | ||
from datetime import datetime | ||
from functools import wraps | ||
|
||
from django.db.models import Q | ||
|
||
|
@@ -126,3 +127,25 @@ def all_domains_with_migrations_in_progress(): | |
def reset_caches(domain, slug): | ||
any_migrations_in_progress(domain, strict=True) | ||
get_migration_status(domain, slug, strict=True) | ||
|
||
|
||
def once_off_migration(slug): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this. |
||
"""Ensure that the body of a migration is run only once | ||
|
||
You can still run it again if there is an exception | ||
""" | ||
def outer(migration_fn): | ||
@wraps(migration_fn) | ||
def inner(*args, **kwargs): | ||
if get_migration_complete(ALL_DOMAINS, slug): | ||
return | ||
set_migration_started(ALL_DOMAINS, slug) | ||
try: | ||
res = migration_fn(*args, **kwargs) | ||
except Exception: | ||
set_migration_not_started(ALL_DOMAINS, slug) | ||
raise | ||
set_migration_complete(ALL_DOMAINS, slug) | ||
return res | ||
return inner | ||
return outer |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,6 @@ | |
import jsonschema | ||
from memoized import memoized | ||
|
||
from dimagi.utils.couch.bulk import get_docs | ||
from dimagi.utils.couch.database import iter_docs | ||
from dimagi.utils.web import json_request, json_response | ||
|
||
|
@@ -367,32 +366,28 @@ def get_paginated_cases_without_gps(self, domain, page, limit): | |
|
||
def _get_paginated_users_without_gps(domain, page, limit, query): | ||
location_prop_name = get_geo_user_property(domain) | ||
query = ( | ||
res = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zandre-eng What do you think of this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apologies for the late response on this as I was away on PTO. The changes look good, and it's nice that we're able to handle pagination now directly in the ES query. |
||
UserES() | ||
.domain(domain) | ||
.mobile_users() | ||
.missing_or_empty_user_data_property(location_prop_name) | ||
.search_string_query(query, ['username']) | ||
.fields(['_id', 'username']) | ||
.sort('created_on', desc=True) | ||
.start((page - 1) * limit) | ||
.size(limit) | ||
.run() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Man was this pulling all the users every function call when you could just pass the page directly to ES? 💀 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep. And then fetching the users again from couch. It does the same thing in the function below this one too - I just didn't want to get carried away with cleanup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice, I see tests cover this |
||
) | ||
|
||
paginator = Paginator(query.get_ids(), limit) | ||
user_ids_page = list(paginator.get_page(page)) | ||
user_docs = get_docs(CommCareUser.get_db(), keys=user_ids_page) | ||
user_data = [] | ||
for user_doc in user_docs: | ||
lat, lon = get_lat_lon_from_dict(user_doc['user_data'], location_prop_name) | ||
user_data.append( | ||
{ | ||
'id': user_doc['_id'], | ||
'name': user_doc['username'].split('@')[0], | ||
'lat': lat, | ||
'lon': lon, | ||
} | ||
) | ||
return { | ||
'items': user_data, | ||
'total': paginator.count, | ||
'items': [ | ||
{ | ||
'id': hit['_id'], | ||
'name': hit['username'].split('@')[0], | ||
'lat': '', | ||
'lon': '', | ||
} for hit in res.hits | ||
], | ||
'total': res.total, | ||
} | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,17 +41,15 @@ def __init__(self, upload_domain, user_domain, user, is_new_user, changed_by_use | |
|
||
if not is_new_user: | ||
self.original_user_doc = self.user.to_json() | ||
self.original_user_data = self.user.get_user_data(user_domain).raw | ||
else: | ||
self.original_user_doc = None | ||
self.original_user_data = None | ||
|
||
self.fields_changed = {} | ||
self.change_messages = {} | ||
|
||
self._save = False # flag to check if log needs to be saved for updates | ||
|
||
def add_changes(self, changes): | ||
def add_changes(self, changes, skip_confirmation=False): | ||
""" | ||
Add changes to user properties. | ||
Ignored for new user since the whole user doc is logged for a new user | ||
|
@@ -60,7 +58,7 @@ def add_changes(self, changes): | |
if self.is_new_user: | ||
return | ||
for name, new_value in changes.items(): | ||
if self.original_user_doc[name] != new_value: | ||
if skip_confirmation or self.original_user_doc[name] != new_value: | ||
self.fields_changed[name] = new_value | ||
self._save = True | ||
|
||
|
@@ -157,6 +155,8 @@ def update_user_data(self, data, uncategorized_data, profile_name, profiles_by_n | |
from corehq.apps.users.user_data import UserDataError | ||
user_data = self.user.get_user_data(self.user_domain) | ||
old_profile_id = user_data.profile_id | ||
old_user_data = user_data.raw | ||
|
||
if PROFILE_SLUG in data: | ||
raise UserUploadError(_("You cannot set {} directly").format(PROFILE_SLUG)) | ||
if profile_name: | ||
|
@@ -174,20 +174,16 @@ def update_user_data(self, data, uncategorized_data, profile_name, profiles_by_n | |
if user_data.profile_id and user_data.profile_id != old_profile_id: | ||
self.logger.add_info(UserChangeMessage.profile_info(user_data.profile_id, profile_name)) | ||
|
||
if old_user_data != user_data.raw: | ||
self.logger.add_changes({'user_data': user_data.raw}, skip_confirmation=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand we did that before too. But is logging the user data a good idea? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate? This code logs all changes made to a user for auditing purposes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it is just the name of the field being There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. true - though There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair |
||
|
||
def save_log(self): | ||
# Tracking for role is done post save to have role setup correctly on save | ||
if self.role_updated: | ||
new_role = self.user.get_role(domain=self.user_domain) | ||
self.logger.add_info(UserChangeMessage.role_change(new_role)) | ||
|
||
self._include_user_data_changes() | ||
return self.logger.save() | ||
|
||
def _include_user_data_changes(self): | ||
new_user_data = self.user.get_user_data(self.user_domain).raw | ||
if self.logger.original_user_data != new_user_data: | ||
self.logger.add_changes({'user_data': new_user_data}) | ||
|
||
|
||
class CommCareUserImporter(BaseUserImporter): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -600,19 +600,21 @@ def __call__(self, item, evaluation_context=None): | |
@staticmethod | ||
@ucr_context_cache(vary_on=('related_doc_type', 'doc_id',)) | ||
def _get_document(related_doc_type, doc_id, evaluation_context): | ||
domain = evaluation_context.root_doc['domain'] | ||
assert domain | ||
document_store = get_document_store_for_doc_type( | ||
evaluation_context.root_doc['domain'], related_doc_type, | ||
load_source="related_doc_expression") | ||
domain, related_doc_type, load_source="related_doc_expression") | ||
try: | ||
doc = document_store.get_document(doc_id) | ||
except DocumentNotFoundError: | ||
return None | ||
if evaluation_context.root_doc['domain'] != doc.get('domain'): | ||
if domain != doc.get('domain'): | ||
return None | ||
if related_doc_type == 'CommCareUser': | ||
doc['user_data'] = CommCareUser.wrap(doc).get_user_data(domain).to_dict() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess user data just hasn't been part of UCRs since the migration? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these expressions not apply to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's right - this has been broken since the migration. You can't do a related doc lookup on a web user, since they don't have a top level There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For what it's worth, this means of directly accessing the |
||
return doc | ||
|
||
def get_value(self, doc_id, evaluation_context): | ||
assert evaluation_context.root_doc['domain'] | ||
doc = self._get_document(self.related_doc_type, doc_id, evaluation_context) | ||
# explicitly use a new evaluation context since this is a new document | ||
return self._value_expression(doc, EvaluationContext(doc, 0)) | ||
|
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# One-off migration, June 2024 | ||
from itertools import chain | ||
|
||
from django.core.management.base import BaseCommand | ||
|
||
from corehq.apps.domain_migration_flags.api import once_off_migration | ||
from corehq.apps.users.models import CommCareUser | ||
from corehq.dbaccessors.couchapps.all_docs import ( | ||
get_doc_count_by_type, | ||
iter_all_doc_ids, | ||
) | ||
from corehq.util.couch import DocUpdate, iter_update | ||
from corehq.util.log import with_progress_bar | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Remove now-stale CouchUser.user_data field" | ||
|
||
def handle(self, *args, **options): | ||
do_migration() | ||
|
||
|
||
@once_off_migration("rm_couch_user_data") | ||
def do_migration(): | ||
db = CommCareUser.get_db() | ||
count = (get_doc_count_by_type(db, 'WebUser') | ||
+ get_doc_count_by_type(db, 'CommCareUser')) | ||
all_ids = chain(iter_all_doc_ids(db, 'WebUser'), | ||
iter_all_doc_ids(db, 'CommCareUser')) | ||
iter_update(db, _update_user, with_progress_bar(all_ids, count), verbose=True) | ||
|
||
|
||
def _update_user(user_doc): | ||
couch_data = user_doc.pop('user_data', ...) | ||
if couch_data is not ...: | ||
return DocUpdate(user_doc) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't the API still be trying to update the
user_data
attribute on theCommCareUser
model? Or is that somehow otherwise accounted for?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, good question. That's handled here:
commcare-hq/corehq/apps/api/user_updates.py
Lines 112 to 122 in d3d0b80
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see,
obj_update
should manually override the update for this field.