From 25113f6b96999b0c6eb19f2d19c1b38aba4abd2d Mon Sep 17 00:00:00 2001 From: JackScanlon Date: Wed, 3 Jan 2024 15:23:48 +0000 Subject: [PATCH 01/45] Init implementation of testing suite for DAG & Trees; and impl. fixes for unescaped titles, coding system computation etc --- CodeListLibrary_project/clinicalcode/admin.py | 5 + .../clinicalcode/entity_utils/create_utils.py | 24 +- .../clinicalcode/generators/__init__.py | 0 .../generators/graphs/__init__.py | 0 .../generators/graphs/constants.py | 5 + .../generators/graphs/generator.py | 85 +++++ .../clinicalcode/generators/graphs/utils.py | 42 +++ .../management/commands/dag_tasks.py | 314 ++++++++++++++++++ ...11_clinicaldiseasecategoryedge_and_more.py | 48 +++ .../models/ClinicalDiseaseCategory.py | 55 +++ CodeListLibrary_project/clinicalcode/urls.py | 1 + .../clinicalcode/views/adminTemp.py | 81 +++++ .../templates/components/create/aside.html | 6 +- .../create/section/section_start.html | 2 +- .../templates/components/details/aside.html | 6 +- .../details/section/section_start.html | 2 +- CodeListLibrary_project/data/.gitignore | 3 + CodeListLibrary_project/data/graphs/.gitkeep | 0 .../dynamic_templates/atlas_phecode.json | 141 ++++++++ docker/requirements/base.txt | 1 + docker/requirements/local.txt | 3 +- docs/sql-scripts/examine_codingsystems.sql | 107 ++++++ 22 files changed, 925 insertions(+), 6 deletions(-) create mode 100644 CodeListLibrary_project/clinicalcode/generators/__init__.py create mode 100644 CodeListLibrary_project/clinicalcode/generators/graphs/__init__.py create mode 100644 CodeListLibrary_project/clinicalcode/generators/graphs/constants.py create mode 100644 CodeListLibrary_project/clinicalcode/generators/graphs/generator.py create mode 100644 CodeListLibrary_project/clinicalcode/generators/graphs/utils.py create mode 100644 CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py create mode 100644 CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py create mode 100644 CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py create mode 100644 CodeListLibrary_project/data/.gitignore create mode 100644 CodeListLibrary_project/data/graphs/.gitkeep create mode 100644 CodeListLibrary_project/dynamic_templates/atlas_phecode.json create mode 100644 docs/sql-scripts/examine_codingsystems.sql diff --git a/CodeListLibrary_project/clinicalcode/admin.py b/CodeListLibrary_project/clinicalcode/admin.py index 592cc3715..55b32b666 100644 --- a/CodeListLibrary_project/clinicalcode/admin.py +++ b/CodeListLibrary_project/clinicalcode/admin.py @@ -7,9 +7,14 @@ from .models.EntityClass import EntityClass from .models.GenericEntity import GenericEntity from .models.Template import Template +from .models.ClinicalDiseaseCategory import ClinicalDiseaseCategoryNode from .forms.TemplateForm import TemplateAdminForm from .forms.EntityClassForm import EntityAdminForm +@admin.register(ClinicalDiseaseCategoryNode) +class ClinicalDiseaseCategoryNode(admin.ModelAdmin): + list_display = ['id', 'name', 'code', 'coding_system', 'code_id'] + @admin.register(CodingSystemFilter) class CodingSystemFilterAdmin(admin.ModelAdmin): diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py index bcfbef954..657662042 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py @@ -171,7 +171,7 @@ def try_validate_sourced_value(field, template, data, default=None, request=None data = str(data) if data in options: return data - + return default def validate_form_method(form_method, errors=[], default=None): @@ -571,6 +571,24 @@ def validate_metadata_value(request, field, value, errors=[]): field_value = gen_utils.try_value_as_type(value, field_type, validation) return field_value, True +def is_computed_template_field(field, form_template): + """ + Checks whether a field is considered a computed field within its template + """ + field_data = template_utils.get_layout_field(form_template, field) + if field_data is None: + return False + + validation = template_utils.try_get_content(field_data, 'validation') + if validation is None: + return False + + field_computed = template_utils.try_get_content(validation, 'computed') + if field_computed is not None: + return True + + return False + def validate_template_value(request, field, form_template, value, errors=[]): """ Validates the form's field value against the entity template @@ -659,9 +677,13 @@ def validate_entity_form(request, content, errors=[], method=None): continue top_level_data[field] = field_value elif validate_template_field(form_template, field): + if is_computed_template_field(field, form_template): + continue + field_value, validated = validate_template_value(request, field, form_template, value, errors) if not validated or field_value is None: continue + template_data[field] = field_value try_add_computed_fields(field, form_data, form_template, template_data) diff --git a/CodeListLibrary_project/clinicalcode/generators/__init__.py b/CodeListLibrary_project/clinicalcode/generators/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/CodeListLibrary_project/clinicalcode/generators/graphs/__init__.py b/CodeListLibrary_project/clinicalcode/generators/graphs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/CodeListLibrary_project/clinicalcode/generators/graphs/constants.py b/CodeListLibrary_project/clinicalcode/generators/graphs/constants.py new file mode 100644 index 000000000..b1a0fc359 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/generators/graphs/constants.py @@ -0,0 +1,5 @@ +import enum + +class GraphTypes(str, enum.Enum): + Tree = 0 + DirectedAcyclicGraph = 1 diff --git a/CodeListLibrary_project/clinicalcode/generators/graphs/generator.py b/CodeListLibrary_project/clinicalcode/generators/graphs/generator.py new file mode 100644 index 000000000..a6aab0036 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/generators/graphs/generator.py @@ -0,0 +1,85 @@ +from faker import Faker + +import json + +from . import utils +from . import constants + +class Graph: + """ + [!] Note: see utils.py for kwargs to change size / connectivity + + e.g. + + ```py + # Generate a graph + graph = Graph.generate(graph_type=Graph.Types.DirectedAcyclicGraph) # or Graph.generate(graph_type=Graph.Types.Tree) + + # `graph.dots` - can be used to generate the edge list in DOT format, ref @ https://en.wikipedia.org/wiki/DOT_(graph_description_language + print(graph.dots) + + # `graph.nodes` - provides the fake data associated with each node and its edge list + print(graph.nodes) + + # `graph.dump` - dump to file if needed + graph.dump(output_file='./test.json') + ``` + + """ + + __key = object() + + @classmethod + def generate(cls, graph_type, **kwargs): + return Graph(cls.__key, graph_type, **kwargs) + + @classmethod + @property + def Types(cls): + return constants.GraphTypes + + def __init__(self, key, graph_type, **kwargs): + if key != Graph._Graph__key: + raise AssertionError('Constructor is private, please use the `generate` method') + + if graph_type == constants.GraphTypes.DirectedAcyclicGraph: + self.network = utils.generate_dag(**kwargs) + elif graph_type == constants.GraphTypes.Tree: + self.network = utils.generate_tree(**kwargs) + else: + raise NotImplementedError('Graph type is not implemented') + self.type = graph_type + + @property + def dots(self): + dots = '' + for edge in self.network: + dots += '\t%(index)s -> %(vertex)s;\n' % { 'index': edge[0], 'vertex': edge[1] } + + return 'digraph {\n%s}' % dots + + @property + def nodes(self): + fake = Faker() + nodes = [ ] + for edge in self.network: + node = next((x for x in nodes if x['id'] == edge[0]), None) + if not node: + node = { 'id': edge[0], 'name': fake.name(), 'edges': [ ] } + nodes.append(node) + + connection = next((x for x in nodes if x['id'] == edge[1]), None) + if not connection: + nodes.append({ 'id': edge[1], 'name': fake.name(), 'edges': [ ] }) + + node['edges'].append(edge[1]) + + return nodes + + def dump(self, output_file=None, indent=2): + nodes = self.nodes + if isinstance(output_file, str): + with open(output_file, 'w') as f: + json.dump(nodes, f, indent=indent) + + return json.dumps(nodes, indent=indent) diff --git a/CodeListLibrary_project/clinicalcode/generators/graphs/utils.py b/CodeListLibrary_project/clinicalcode/generators/graphs/utils.py new file mode 100644 index 000000000..10ff75f26 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/generators/graphs/utils.py @@ -0,0 +1,42 @@ +import random + +def generate_dag(connectivity=0.5, min_rank_width=2, max_rank_width=4, min_rank_height=3, max_rank_height=5): + ranks = random.randint(min_rank_height, max_rank_height) + nodes = 0 + node_counter = 0 + network = [] + rank_list = [] + + for i in range(ranks): + new_nodes = random.randint(min_rank_width, max_rank_width) + + ranks = [] + for j in range(new_nodes): + ranks.append(node_counter) + node_counter += 1 + rank_list.append(ranks) + + if i > 0: + for j in rank_list[i - 1]: + for k in range(new_nodes): + if random.random() < connectivity: + network.append((j, k+nodes)) + + nodes += new_nodes + + return network + +def generate_tree(size=10): + sequence = [random.choice(range(size)) for i in range(size - 2)] + height = len(sequence) + L = set(range(height+2)) + network = [] + + for i in range(height): + u, v = sequence[0], min(L - set(sequence)) + sequence.pop(0) + L.remove(v) + network.append((u,v)) + network.append((L.pop(), L.pop())) + + return network diff --git a/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py new file mode 100644 index 000000000..6e219b9da --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py @@ -0,0 +1,314 @@ +from django.core.management.base import BaseCommand +from django.db import transaction + +import os +import json +import enum + +from ...generators.graphs.generator import Graph as GraphGenerator +from ...models.CodingSystem import CodingSystem +from ...models.ClinicalDiseaseCategory import ClinicalDiseaseCategoryEdge, ClinicalDiseaseCategoryNode + + +###################################################### +# # +# Constants # +# # +###################################################### +class IterableMeta(enum.EnumMeta): + """ + Metaclass that defines additional methods + of operation and interaction with enums + + """ + def from_name(cls, name): + if name in cls: + return getattr(cls, name) + + def __contains__(cls, lhs): + try: + cls(lhs) + except ValueError: + return lhs in cls.__members__.keys() + else: + return True + +class GraphType(int, enum.Enum, metaclass=IterableMeta): + """ + Parsed from input file to determine how to handle the data + + e.g. { type: 'CODE_CATEGORIES' } within `./data/graphs/icd10_categories.json` + + """ + CODE_CATEGORIES = 0 + +class LogType(int, enum.Enum, metaclass=IterableMeta): + """ + Enum that reflects the output style, as described by the BaseCommand log style + + See ref @ https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/#django.core.management.BaseCommand.style + + """ + SUCCESS = 1 + NOTICE = 2 + WARNING = 3 + ERROR = 4 + + +###################################################### +# # +# Graph Builders # +# # +###################################################### +class GraphBuilders: + """ + Builds a graph according to the given GraphType + and its associated data + + """ + + @classmethod + def try_build(cls, builder_type, data): + """ + Attempts to build a graph given a valid builder type + + """ + if not isinstance(builder_type, GraphType): + return False, 'Expected valid GraphType, got %s' % str(builder_type) + + desired_builder = getattr(cls, builder_type.name) + if desired_builder is None: + return False, 'Invalid Builder, no class method available with the name: %s' % builder_type.name + + bound_to = getattr(desired_builder, '__self__', None) + if not isinstance(bound_to, type) or bound_to is not cls: + return False, 'Invalid Builder, no appropriate class method found for BuilderType<%s>' % builder_type.name + + return desired_builder(data) + + @classmethod + def CODE_CATEGORIES(cls, data): + """ + ICD-10 Disease Category builder + + """ + if not isinstance(data, list): + return False, 'Invalid data type, expected list but got %s' % type(data) + + + '''! TODO !''' + # Need to process code categories data + + + return False, data + + +###################################################### +# # +# DAG Command # +# # +###################################################### +class Command(BaseCommand): + help = 'Various tasks associated with the generation of DAGs' + + DEFAULT_FILE = 'data/graphs/categories.json' + VALID_FILE_TYPES = ['.json'] + LOG_FILE_NAME = 'DAG_LOGS' + LOG_FILE_EXT = '.txt' + + def __get_log_style(self, style): + """ + Returns the BaseCommand's log style + + See ref @ https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/#django.core.management.BaseCommand.style + + """ + if isinstance(style, str): + style = style.upper() + if style in LogType.__members__: + return getattr(self.style, style) + elif isinstance(style, LogType): + if style.name in LogType.__members__: + return getattr(self.style, style.name) + return self.style.SUCCESS + + def __log_dots(self, nodes, name=None): + """ + Logs the edge list in DOT format + + See ref @ https://en.wikipedia.org/wiki/DOT_(graph_description_language + + """ + name = name or 'Unknown' + dots = '' + for i, node in enumerate(nodes): + for child in node.children.all(): + dots += '\t%(index)s -> %(vertex)s;\n' % { 'index': node.id, 'vertex': child.id } + + self.__log_to_file('Digraph<%s>: digraph {\n%s}' % (name, dots)) + + def __log_to_file(self, message, style=LogType.SUCCESS): + """ + Logs the message, prepended with its style, to the log file (if a valid directory has been provided) + + """ + directory = self._log_dir + if not isinstance(directory, str): + return + + if not os.path.isabs(directory): + directory = os.path.join( + os.path.abspath(os.path.dirname('manage.py')), + directory + ) + + if not os.path.exists(directory): + os.makedirs(directory) + + style = style.name if isinstance(style, LogType) else style + filename = os.path.join(directory, f'{self.LOG_FILE_NAME}{self.LOG_FILE_EXT}') + offset = '\n' if os.path.exists(filename) else '' + with open(filename, 'a') as file: + file.writelines([f'{offset}[{style}] {message}\n']) + + def __log(self, message, style=LogType.SUCCESS): + """ + Logs the incoming to: + 1. The log file, if a valid directory has been provided + 2. The terminal, if the `-p` argument has been provided + + """ + self.__log_to_file(message, style) + + if not self._verbose: + return + style = self.__get_log_style(style) + self.stdout.write(style(message)) + + def __try_load_file(self, filepath): + """ + Attempts to load the given filepath as a JSON object + + """ + filepath = os.path.join( + os.path.abspath(os.path.dirname('manage.py')), + filepath if filepath is not None else self.DEFAULT_FILE + ) + + self.__log(f'Initialising DAG command with Path<{filepath}> ...') + + if not os.path.exists(filepath): + self.__log(f'Path<{filepath}> does not exist', LogType.ERROR) + return + + if not os.path.isfile(filepath): + self.__log(f'Path<{filepath}> does not reference a file', LogType.ERROR) + return + + file_extension = os.path.splitext(filepath)[1] + if file_extension not in self.VALID_FILE_TYPES: + self.__log(f'File<{filepath}> does not reference a valid file of expected types: {", ".join(self.VALID_FILE_TYPES)}', LogType.ERROR) + return + + try: + with open(filepath) as f: + data = json.load(f) + return data + except Exception as e: + self.__log(f'Error when attempting to load File<{filepath}>:\n{str(e)}', LogType.ERROR) + return None + + def __try_build_dag(self, filepath): + """ + Attempts to build the DAG from the given filepath + + """ + # attempt import + data = self.__try_load_file(filepath) + if data is None: + return + + # validate + graph_input = data.get('data', None) + if graph_input is None: + self.__log(f'No property `data` found within File<{filepath}>', LogType.ERROR) + return + + builder_type = data.get('type', None) + builder_type = GraphType[builder_type] if builder_type is not None and builder_type in GraphType else None + + if not isinstance(builder_type, GraphType): + self.__log(f'No valid property `type` found within File<{filepath}>', LogType.ERROR) + return + + # attempt generation + success, result = GraphBuilders.try_build(builder_type, graph_input) + if not success: + result = result if isinstance(result, str) else 'Unknown error occurred' + self.__log(f'Error occurred when processing File<{filepath}> via BuilderType<{builder_type.name}>:\n\t{result}', LogType.ERROR) + return + + def __generate_debug_dag(self): + """ + Responsible for generating a debug dag using the graph generators & its utility methods + + """ + graph = GraphGenerator.generate(graph_type=GraphGenerator.Types.DirectedAcyclicGraph) + nodes = [ClinicalDiseaseCategoryNode(name=node.get('name'), code=str(node.get('id'))) for node in graph.nodes] + nodes = ClinicalDiseaseCategoryNode.objects.bulk_create(nodes) + + output = '' + for i, data in enumerate(graph.nodes): + index = str(data.get('id')) + edges = data.get('edges') + + node = next((x for x in nodes if x.code == index), None) + if not node: + continue + + output = f'{output}\n\tNode [' + if len(edges) > 0: + for j, element in enumerate(edges): + connection = next((x for x in nodes if x.code == str(element)), None) + if not connection: + continue + output = f'{output}\n\t\tConnection' + node.add_child(connection) + output = output + '\n\t]' + else: + output = output + ' ]' + self.__log('Graph Generation {%s\n}' % output) + + if self._log_dir: + self.__log_dots(nodes=nodes, name='DebugGraph') + + def add_arguments(self, parser): + """ + Handles arguments given via the CLI + + """ + parser.add_argument('-p', '--print', type=bool, help='Print debug information to the terminal') + parser.add_argument('-f', '--file', type=str, help='Location of DAG data relative to manage.py') + parser.add_argument('-d', '--debug', type=bool, help='If true, attempts to generate DAG and ignores the --file parameter') + parser.add_argument('-l', '--log', type=str, help=f'Expects directory, will output logs incl. DOTS representation to file as {self.LOG_FILE_NAME}{self.LOG_FILE_EXT}') + + @transaction.atomic + def handle(self, *args, **kwargs): + """ + Main command handle + + """ + # init parameters + verbose = kwargs.get('print') + filepath = kwargs.get('file') + is_debug = kwargs.get('debug') + log_file = kwargs.get('log') + + # det. handle + self._verbose = verbose + self._log_dir = log_file + + if is_debug: + self.__generate_debug_dag() + else: + self.__try_build_dag(filepath or self.DEFAULT_FILE) diff --git a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py new file mode 100644 index 000000000..1b7d4a285 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 4.1.10 on 2024-01-03 13:01 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('clinicalcode', '0110_read_cd_cv2_scd_readv2_defn_ln_gin_idx_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='ClinicalDiseaseCategoryEdge', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ClinicalDiseaseCategoryNode', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ('code', models.CharField(max_length=255)), + ('code_id', models.IntegerField(null=True)), + ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalDiseaseCategoryEdge', to='clinicalcode.clinicaldiseasecategorynode')), + ('coding_system', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='disease_categories', to='clinicalcode.codingsystem')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='clinicaldiseasecategoryedge', + name='child', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parent_edges', to='clinicalcode.clinicaldiseasecategorynode'), + ), + migrations.AddField( + model_name='clinicaldiseasecategoryedge', + name='parent', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='children_edges', to='clinicalcode.clinicaldiseasecategorynode'), + ), + ] diff --git a/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py new file mode 100644 index 000000000..ef11bf5d2 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py @@ -0,0 +1,55 @@ +from django.db import models +from django.apps import apps +from django_postgresql_dag.models import node_factory, edge_factory + +from .CodingSystem import CodingSystem + +class ClinicalDiseaseCategoryEdge(edge_factory('ClinicalDiseaseCategoryNode', concrete=False)): + name = models.CharField(max_length=255, unique=True) + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + self.name = f"{self.parent.name} {self.child.name}" + super().save(*args, **kwargs) + +class ClinicalDiseaseCategoryNode(node_factory(ClinicalDiseaseCategoryEdge)): + name = models.CharField(max_length=255) + code = models.CharField(max_length=255) + coding_system = models.ForeignKey(CodingSystem, on_delete=models.SET_NULL, related_name='disease_categories', null=True, blank=True) + code_id = models.IntegerField(null=True) + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + desired_code = self.code + desired_system = self.coding_system + + if desired_code is not None and desired_system is not None: + try: + comparators = [ desired_code, desired_code.replace('.', '') ] + + coding_system = CodingSystem.objects.get(codingsystem_id=desired_system) + table_name = coding_system.table_name + model_name = coding_system.table_name.replace('clinicalcode_', '') + codes_name = coding_system.coding_system.code_column_name.lower() + + query = """ + select * + from public.%(table_name)s + where lower(%(column_name)s) + """ % { 'table_name': table_name, 'column_name': codes_name } + + codes = apps.get_model(app_label='clinicalcode', model_name=model_name) + code = codes.objects.raw(query + ' = ANY(%(values)s::text[])', { 'values': comparators }) + + code = code.first() if code.exists() else None + except: + self.code_id = None + else: + if code is not None: + self.code_id = code.pk + + super().save(*args, **kwargs) diff --git a/CodeListLibrary_project/clinicalcode/urls.py b/CodeListLibrary_project/clinicalcode/urls.py index ec93cf6b9..a38b5cc60 100644 --- a/CodeListLibrary_project/clinicalcode/urls.py +++ b/CodeListLibrary_project/clinicalcode/urls.py @@ -119,5 +119,6 @@ # url(r'^adminTemp/admin_fix_breathe_dt/$', adminTemp.admin_fix_breathe_dt, name='admin_fix_breathe_dt'), #url(r'^adminTemp/admin_fix_malformed_codes/$', adminTemp.admin_fix_malformed_codes, name='admin_fix_malformed_codes'), url(r'^adminTemp/admin_force_adp_links/$', adminTemp.admin_force_adp_linkage, name='admin_force_adp_links'), + url(r'^adminTemp/admin_fix_coding_system_linkage/$', adminTemp.admin_fix_coding_system_linkage, name='admin_fix_coding_system_linkage'), url(r'^adminTemp/admin_force_brand_links/$', adminTemp.admin_force_brand_links, name='admin_force_brand_links'), ] diff --git a/CodeListLibrary_project/clinicalcode/views/adminTemp.py b/CodeListLibrary_project/clinicalcode/views/adminTemp.py index f3335a937..b1e5fd322 100644 --- a/CodeListLibrary_project/clinicalcode/views/adminTemp.py +++ b/CodeListLibrary_project/clinicalcode/views/adminTemp.py @@ -227,6 +227,87 @@ def admin_fix_malformed_codes(request): } ) +@login_required +def admin_fix_coding_system_linkage(request): + if settings.CLL_READ_ONLY: + raise PermissionDenied + + if not request.user.is_superuser: + raise PermissionDenied + + if not permission_utils.is_member(request.user, 'system developers'): + raise PermissionDenied + + # get + if request.method == 'GET': + return render( + request, + 'clinicalcode/adminTemp/admin_temp_tool.html', + { + 'url': reverse('admin_fix_coding_system_linkage'), + 'action_title': 'Fix Coding System Linkage', + 'hide_phenotype_options': True, + } + ) + + # post + if request.method != 'POST': + raise BadRequest('Invalid') + + row_count = 0 + with connection.cursor() as cursor: + sql = ''' + + update public.clinicalcode_historicalgenericentity as trg + set template_data['coding_system'] = to_jsonb(src.coding_system) + from ( + select entity.phenotype_id, + entity.phenotype_version_id, + array_agg(distinct concept.coding_system_id::integer) as coding_system + from public.clinicalcode_historicalconcept as concept + join ( + select id as phenotype_id, + history_id as phenotype_version_id, + cast(concepts->>'concept_id' as integer) as concept_id, + cast(concepts->>'concept_version_id' as integer) as concept_version_id + from ( + select id, + history_id, + concepts + from public.clinicalcode_historicalgenericentity as entity, + json_array_elements(entity.template_data::json->'concept_information') as concepts + where template_id = 1 + and json_array_length(entity.template_data::json->'concept_information') > 0 + ) results + ) as entity + on entity.concept_id = concept.id + and entity.concept_version_id = concept.history_id + group by entity.phenotype_id, + entity.phenotype_version_id + ) src + where trg.id = src.phenotype_id + and trg.history_id = src.phenotype_version_id + and trg.template_id = 1 + and array( + select jsonb_array_elements_text(trg.template_data->'coding_system') + )::int[] <> src.coding_system; + + ''' + + cursor.execute(sql) + row_count = cursor.rowcount + + return render( + request, + 'clinicalcode/adminTemp/admin_temp_tool.html', + { + 'pk': -10, + 'rowsAffected' : { '1': f'Updated {str(row_count)} entities' }, + 'action_title': 'Fix Coding System Linkage', + 'hide_phenotype_options': True, + } + ) + @login_required def admin_force_adp_linkage(request): if settings.CLL_READ_ONLY: diff --git a/CodeListLibrary_project/cll/templates/components/create/aside.html b/CodeListLibrary_project/cll/templates/components/create/aside.html index c9fb0ac55..9b6437d82 100644 --- a/CodeListLibrary_project/cll/templates/components/create/aside.html +++ b/CodeListLibrary_project/cll/templates/components/create/aside.html @@ -3,7 +3,11 @@
Skip to step... {% for section in create_sections %} - {{ section.title }} + + {{ section.title }} + {% endfor %}
diff --git a/CodeListLibrary_project/cll/templates/components/create/section/section_start.html b/CodeListLibrary_project/cll/templates/components/create/section/section_start.html index 7aca6b5f1..863443019 100644 --- a/CodeListLibrary_project/cll/templates/components/create/section/section_start.html +++ b/CodeListLibrary_project/cll/templates/components/create/section/section_start.html @@ -1,5 +1,5 @@ {% load entity_renderer %} -
  • +
  • {{ section.title }}

    diff --git a/CodeListLibrary_project/cll/templates/components/details/aside.html b/CodeListLibrary_project/cll/templates/components/details/aside.html index 83dbf6381..ca28ed388 100644 --- a/CodeListLibrary_project/cll/templates/components/details/aside.html +++ b/CodeListLibrary_project/cll/templates/components/details/aside.html @@ -3,7 +3,11 @@ diff --git a/CodeListLibrary_project/cll/templates/components/details/section/section_start.html b/CodeListLibrary_project/cll/templates/components/details/section/section_start.html index 89cde89a5..07d028034 100644 --- a/CodeListLibrary_project/cll/templates/components/details/section/section_start.html +++ b/CodeListLibrary_project/cll/templates/components/details/section/section_start.html @@ -1,5 +1,5 @@ {% load entity_renderer %} -
  • +
  • {{ section.title }}

    {% if not section.hide_description %}

    {{ section.description }}

    diff --git a/CodeListLibrary_project/data/.gitignore b/CodeListLibrary_project/data/.gitignore new file mode 100644 index 000000000..8b609feff --- /dev/null +++ b/CodeListLibrary_project/data/.gitignore @@ -0,0 +1,3 @@ +./output +*.txt +*.json diff --git a/CodeListLibrary_project/data/graphs/.gitkeep b/CodeListLibrary_project/data/graphs/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/CodeListLibrary_project/dynamic_templates/atlas_phecode.json b/CodeListLibrary_project/dynamic_templates/atlas_phecode.json new file mode 100644 index 000000000..ad772c481 --- /dev/null +++ b/CodeListLibrary_project/dynamic_templates/atlas_phecode.json @@ -0,0 +1,141 @@ +{ + "template_details": { + "version": 1, + "name": "Atlas Phecode Phenotype", + "description": "A manually curated group of clinical codes intended to capture clinically meaningful concepts for research", + "card_type": "clinical" + }, + + "sections": [ + { + "title": "Name & Author", + "description": "", + "fields": ["name", "author"], + "hide_on_detail": true + }, + { + "title": "Overview", + "description": "An overview of the phenotype with basic metadata.", + "fields": ["type", "coding_system", "data_sources", "collections", "tags", "source_reference"] + }, + { + "title": "Definition", + "description": "", + "fields": ["definition"] + }, + { + "title": "Implementation", + "description": "How this phenotype definition is run against data.", + "fields": ["implementation"], + "hide_if_empty": true + }, + { + "title": "Validation", + "description": "Description of how correctness, appropriateness, and/or quality was assessed.", + "fields": ["validation"], + "do_not_show_in_production": true, + "hide_if_empty": true + }, + { + "title": "Phecode", + "description": "The associated clinical code", + "fields": ["phecode"] + }, + { + "title": "Publication", + "description": "Publication(s) where this phenotype is defined and/or used.", + "fields": ["publications", "citation_requirements"] + } + ], + + "fields": { + "type": { + "title": "Phenotype Type", + "description": "The category of patient characteristic this phenotype falls under.", + "field_type": "enum_dropdown_badge", + "active": true, + "validation": { + "type": "enum", + "mandatory": true, + "options": { + "1": "Biomarker", + "2": "Disease or syndrome", + "3": "Drug", + "4": "Lifestyle risk factor", + "5": "Musculoskeletal", + "6": "Surgical procedure", + "7": "Other" + } + }, + "search": { + "filterable": true, + "api": true + } + }, + "data_sources": { + "title": "Data Sources", + "description": "Data sources the phenotype creators have run this phenotype against; or view as appropriate to use this phenotype for.", + "field_type": "data_sources", + "active": true, + "validation": { + "type": "int_array", + "mandatory": false, + "source": { + "table": "DataSource", + "query": "id", + "relative": "name", + "include": ["uid", "url"] + } + }, + "search": { + "filterable": true, + "api": true + } + }, + "coding_system": { + "title": "Coding System", + "description":"Clinical coding system(s) contained within this phenotype. A phenotype may have multiple concepts, each with its own coding system. All contained coding systems are programmatically represented here.", + "field_type": "coding_system", + "active": true, + "validation": { + "type": "int_array", + "mandatory": false, + "computed": true, + "source": { + "table": "CodingSystem", + "query": "codingsystem_id", + "relative": "name" + } + }, + "search": { + "filterable": true, + "api": true + }, + "hide_on_create": true + }, + "phecode": { + "title": "Phecode", + "description": "The clinical code that captures this clinical concept", + "field_type": "phecode", + "active": true, + "validation": { + "type": "phecode", + "mandatory": false, + "has_children": true + } + }, + "source_reference": { + "title": "Atlas Source Reference", + "description": "The Atlas Phenotype associated with this entity (if any)", + "field_type": "string_inputbox", + "active": true, + "validation": { + "type": "string", + "mandatory": false, + "length": [0, 250], + "regex": "^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$" + }, + "hide_if_empty": true + } + } +} diff --git a/docker/requirements/base.txt b/docker/requirements/base.txt index 793fce599..ae05d46fd 100644 --- a/docker/requirements/base.txt +++ b/docker/requirements/base.txt @@ -18,6 +18,7 @@ django-rest-swagger==2.2.0 django-sass-processor==1.2.2 django-simple-history==3.1.1 django-timezone-field==5.0 +django-postgresql-dag==0.4.0 djangorestframework==3.14.0 djangorestframework-xml==2.0.0 drf-yasg==1.21.7 diff --git a/docker/requirements/local.txt b/docker/requirements/local.txt index 2bb1bd276..a973c57c4 100644 --- a/docker/requirements/local.txt +++ b/docker/requirements/local.txt @@ -1,2 +1,3 @@ -r base.txt # includes the base.txt requirements file -debugpy==1.6.2 \ No newline at end of file +debugpy==1.6.2 +faker==22.0.0 diff --git a/docs/sql-scripts/examine_codingsystems.sql b/docs/sql-scripts/examine_codingsystems.sql new file mode 100644 index 000000000..e0ba60abf --- /dev/null +++ b/docs/sql-scripts/examine_codingsystems.sql @@ -0,0 +1,107 @@ +/* + + Data structures.... + + 1. Clinical GenericEntity model template structure: + + HistoricalGenericEntity::model { + template_data::jsonb { + 'concept_information'::jsonb [ + { 'concept_id': number, 'concept_version_id': number, 'attributes': array[] }, + { 'concept_id': number, 'concept_version_id': number, 'attributes': array[] }, + ], + + 'coding_system'::jsonb [ + coding_system_id, + coding_system_id + ], + } + } + + 2. Need to: + - Find all coding systems associated with a phenotype and aggregate them + - Update each HistoricalGenericEntity model with the updated coding system field + + e.g. + + GenericEntity::model { + template_data::jsonb { + 'concept_information'::jsonb [ + { concept_id: 3405, concept_version_id: 10431 }, + { concept_id: 3407, concept_version_id: 10433 }, + { concept_id: 716, concept_version_id: 2571 }, + ], + + 'coding_system'::jsonb [ + 4, 5, 3 + ], + } + } + +*/ + +/********************************** + * * + * Det. array aggregates * + * * + **********************************/ + +-- select entity.phenotype_id, +-- entity.phenotype_version_id, +-- array_agg(distinct concept.coding_system_id::integer) as coding_system +-- from public.clinicalcode_historicalconcept as concept +-- join entities as entity +-- on entity.concept_id = concept.id and entity.concept_version_id = concept.history_id +-- group by entity.phenotype_id, entity.phenotype_version_id + + +/********************************** + * * + * Det. array comparisons * + * * + **********************************/ + +-- select id as phenotype_id, +-- history_id as phenotype_version_id, +-- array( +-- select jsonb_array_elements_text(entity.template_data->'coding_system') +-- )::int[] +-- from public.clinicalcode_historicalgenericentity as entity +-- where json_array_length(entity.template_data::json->'coding_system') > 0 + + +/********************************** + * * + * Update all entities * + * * + **********************************/ + +update public.clinicalcode_historicalgenericentity as trg + set template_data['coding_system'] = to_jsonb(src.coding_system) + from ( + select entity.phenotype_id, + entity.phenotype_version_id, + array_agg(distinct concept.coding_system_id::integer) as coding_system + from public.clinicalcode_historicalconcept as concept + join ( + select id as phenotype_id, + history_id as phenotype_version_id, + cast(concepts->>'concept_id' as integer) as concept_id, + cast(concepts->>'concept_version_id' as integer) as concept_version_id + from ( + select id, + history_id, + concepts + from public.clinicalcode_historicalgenericentity as entity, + json_array_elements(entity.template_data::json->'concept_information') as concepts + where json_array_length(entity.template_data::json->'concept_information') > 0 + ) results + ) as entity + on entity.concept_id = concept.id and entity.concept_version_id = concept.history_id + group by entity.phenotype_id, entity.phenotype_version_id + ) src + where trg.id = src.phenotype_id + and trg.history_id = src.phenotype_version_id + and array( + select jsonb_array_elements_text(trg.template_data->'coding_system') + )::int[] <> src.coding_system; From 1331989c83d5b286a4831337ff0373969f02b590 Mon Sep 17 00:00:00 2001 From: JackScanlon Date: Wed, 3 Jan 2024 18:41:52 +0000 Subject: [PATCH 02/45] Impl. ICD-10 disease categories for testing --- .../management/commands/dag_tasks.py | 112 +++++++++++++++++- ...11_clinicaldiseasecategoryedge_and_more.py | 10 +- .../models/ClinicalDiseaseCategory.py | 20 ++-- 3 files changed, 121 insertions(+), 21 deletions(-) diff --git a/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py index 6e219b9da..229107805 100644 --- a/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py +++ b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py @@ -1,9 +1,11 @@ from django.core.management.base import BaseCommand -from django.db import transaction +from django.db import transaction, connection +import re import os import json import enum +import time from ...generators.graphs.generator import Graph as GraphGenerator from ...models.CodingSystem import CodingSystem @@ -89,18 +91,111 @@ def try_build(cls, builder_type, data): @classmethod def CODE_CATEGORIES(cls, data): """ - ICD-10 Disease Category builder + ICD-10 Disease Category builder test(s) """ + + ''' [!] Warning: This is only partially optimised ''' if not isinstance(data, list): return False, 'Invalid data type, expected list but got %s' % type(data) + # process nodes + nodes = [ ] + linkage = [ ] + result = [ ] + name_hashmap = { } + started = time.time() + + def create_linkage(parent, parent_index, children): + for child_data in children: + name = child_data.get('name').strip() + code = child_data.get('code').strip() + + # ICD-10 uses non-unique names, add code to vary them if required + if name in name_hashmap: + name = f'{name} ({code})' + name_hashmap[name] = True + + # Create child node and process descendants + node = ClinicalDiseaseCategoryNode(name=name, code=code) + index = len(nodes) + nodes.append(node) + linkage.append([parent_index, index]) + + descendants = child_data.get('children') + child_count = len(descendants) if isinstance(descendants, list) else 0 + result.append(f'\t\tChildNode') + + if isinstance(descendants, list): + create_linkage(node, index, descendants) - '''! TODO !''' - # Need to process code categories data + for root_data in data: + # clean up the section name(s) + root_name = root_data.get('name').strip() + matched_code = re.search(r'(\b(?=[a-zA-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*-\b(?=[A-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*)', root_name) + root_name = re.sub(r'\((\b(?=[a-zA-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*-\b(?=[A-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*)\)', '', root_name).strip() + derived_code = matched_code.group() if matched_code else None - return False, data + # process node and its branches + root = ClinicalDiseaseCategoryNode(name=root_name, code=derived_code) + index = len(nodes) + nodes.append(root) + + children = root_data.get('sections') + result.append(f'\tRootNode') + + create_linkage(root, index, children) + + # bulk create nodes & children + nodes = ClinicalDiseaseCategoryNode.objects.bulk_create(nodes) + + # bulk create edges + ClinicalDiseaseCategoryNode.children.through.objects.bulk_create( + [ + # list comprehension here is required because we need to match the instance(s) + ClinicalDiseaseCategoryNode.children.through( + name=f'{nodes[link[0]].name} | {nodes[link[1]].name}', + parent=nodes[link[0]], + child=nodes[link[1]] + ) + for link in linkage + ], + batch_size=7000 + ) + + # update coding system and apply related code + icd_10_id = CodingSystem.objects.get(name='ICD10 codes').id + + with connection.cursor() as cursor: + ''' [!] Note: We could probably optimise this ''' + + sql = """ + -- update matched values + update public.clinicalcode_clinicaldiseasecategorynode as trg + set coding_system_id = %(coding_id)s, + code_id = src.code_id + from ( + select node.id as node_id, + code.id as code_id + from public.clinicalcode_clinicaldiseasecategorynode as node + join public.clinicalcode_icd10_codes_and_titles_and_metadata as code + on node.code = code.code + ) src + where trg.id = src.node_id; + + -- update null values + update public.clinicalcode_clinicaldiseasecategorynode as trg + set coding_system_id = %(coding_id)s + where coding_system_id is null; + """ + cursor.execute(sql, { 'coding_id': icd_10_id }) + + # create result string for log + elapsed = (time.time() - started) + result = 'Created Nodes {\n%s\n}' % (icd_10_id, elapsed, '\n'.join(result)) + + return True, result ###################################################### @@ -119,7 +214,7 @@ class Command(BaseCommand): def __get_log_style(self, style): """ Returns the BaseCommand's log style - + See ref @ https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/#django.core.management.BaseCommand.style """ @@ -248,6 +343,11 @@ def __try_build_dag(self, filepath): self.__log(f'Error occurred when processing File<{filepath}> via BuilderType<{builder_type.name}>:\n\t{result}', LogType.ERROR) return + self.__log('Building Graph from File<%s> was completed successfully' % filepath, LogType.SUCCESS) + + if isinstance(result, str): + self.__log(result, LogType.SUCCESS) + def __generate_debug_dag(self): """ Responsible for generating a debug dag using the graph generators & its utility methods diff --git a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py index 1b7d4a285..202a777a9 100644 --- a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py +++ b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.1.10 on 2024-01-03 13:01 +# Generated by Django 4.1.10 on 2024-01-03 18:14 from django.db import migrations, models import django.db.models.deletion @@ -15,7 +15,7 @@ class Migration(migrations.Migration): name='ClinicalDiseaseCategoryEdge', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=255, unique=True)), + ('name', models.CharField(max_length=1024, unique=True)), ], options={ 'abstract': False, @@ -25,9 +25,9 @@ class Migration(migrations.Migration): name='ClinicalDiseaseCategoryNode', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=255)), - ('code', models.CharField(max_length=255)), - ('code_id', models.IntegerField(null=True)), + ('name', models.CharField(max_length=510)), + ('code', models.CharField(blank=True, max_length=255, null=True)), + ('code_id', models.IntegerField(blank=True, null=True)), ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalDiseaseCategoryEdge', to='clinicalcode.clinicaldiseasecategorynode')), ('coding_system', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='disease_categories', to='clinicalcode.codingsystem')), ], diff --git a/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py index ef11bf5d2..61e78afe0 100644 --- a/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py +++ b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py @@ -5,20 +5,20 @@ from .CodingSystem import CodingSystem class ClinicalDiseaseCategoryEdge(edge_factory('ClinicalDiseaseCategoryNode', concrete=False)): - name = models.CharField(max_length=255, unique=True) + name = models.CharField(max_length=1024, unique=True) def __str__(self): return self.name def save(self, *args, **kwargs): - self.name = f"{self.parent.name} {self.child.name}" + self.name = f'{self.parent.name} {self.child.name}' super().save(*args, **kwargs) class ClinicalDiseaseCategoryNode(node_factory(ClinicalDiseaseCategoryEdge)): - name = models.CharField(max_length=255) - code = models.CharField(max_length=255) - coding_system = models.ForeignKey(CodingSystem, on_delete=models.SET_NULL, related_name='disease_categories', null=True, blank=True) - code_id = models.IntegerField(null=True) + name = models.CharField(max_length=510) + code = models.CharField(max_length=255, null=True, blank=True) + coding_system = models.ForeignKey(CodingSystem, on_delete=models.SET_NULL, related_name='disease_categories', null=True, blank=True) # models.IntegerField(null=True, blank=True) + code_id = models.IntegerField(null=True, blank=True) def __str__(self): return self.name @@ -31,10 +31,9 @@ def save(self, *args, **kwargs): try: comparators = [ desired_code, desired_code.replace('.', '') ] - coding_system = CodingSystem.objects.get(codingsystem_id=desired_system) - table_name = coding_system.table_name - model_name = coding_system.table_name.replace('clinicalcode_', '') - codes_name = coding_system.coding_system.code_column_name.lower() + table_name = desired_system.table_name + model_name = desired_system.table_name.replace('clinicalcode_', '') + codes_name = desired_system.code_column_name.lower() query = """ select * @@ -48,6 +47,7 @@ def save(self, *args, **kwargs): code = code.first() if code.exists() else None except: self.code_id = None + pass else: if code is not None: self.code_id = code.pk From 03a0b3f662a7f47d72434f2f61a2e88c05fc5706 Mon Sep 17 00:00:00 2001 From: JackScanlon Date: Wed, 3 Jan 2024 20:24:12 +0000 Subject: [PATCH 03/45] Fix formatting --- .../clinicalcode/models/ClinicalDiseaseCategory.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py index 61e78afe0..eef65b797 100644 --- a/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py +++ b/CodeListLibrary_project/clinicalcode/models/ClinicalDiseaseCategory.py @@ -17,8 +17,8 @@ def save(self, *args, **kwargs): class ClinicalDiseaseCategoryNode(node_factory(ClinicalDiseaseCategoryEdge)): name = models.CharField(max_length=510) code = models.CharField(max_length=255, null=True, blank=True) - coding_system = models.ForeignKey(CodingSystem, on_delete=models.SET_NULL, related_name='disease_categories', null=True, blank=True) # models.IntegerField(null=True, blank=True) code_id = models.IntegerField(null=True, blank=True) + coding_system = models.ForeignKey(CodingSystem, on_delete=models.SET_NULL, related_name='disease_categories', null=True, blank=True) def __str__(self): return self.name @@ -38,12 +38,10 @@ def save(self, *args, **kwargs): query = """ select * from public.%(table_name)s - where lower(%(column_name)s) - """ % { 'table_name': table_name, 'column_name': codes_name } + where lower(%(column_name)s)""" % { 'table_name': table_name, 'column_name': codes_name } codes = apps.get_model(app_label='clinicalcode', model_name=model_name) code = codes.objects.raw(query + ' = ANY(%(values)s::text[])', { 'values': comparators }) - code = code.first() if code.exists() else None except: self.code_id = None From 7e7363a76809eb9f1ff5f94e0c46f20a2a7c6fbc Mon Sep 17 00:00:00 2001 From: JackScanlon Date: Tue, 30 Jan 2024 13:25:59 +0000 Subject: [PATCH 04/45] Post-impl. anatomical & speciality builders; along with changes to phecode phenotype --- CodeListLibrary_project/clinicalcode/admin.py | 12 ++ .../management/commands/dag_tasks.py | 143 +++++++++++++++++- ...clinicalanatomicalcategoryedge_and_more.py | 111 ++++++++++++++ ...11_clinicaldiseasecategoryedge_and_more.py | 48 ------ .../models/ClinicalAnatomicalCategory.py | 20 +++ .../models/ClinicalSpecialityCategory.py | 19 +++ .../dynamic_templates/atlas_phecode.json | 19 +-- .../clinical_coded_phenotype.json | 2 +- .../structured_data_algorithm_phenotype.json | 2 +- .../dynamic_templates/working_set.json | 2 +- 10 files changed, 310 insertions(+), 68 deletions(-) create mode 100644 CodeListLibrary_project/clinicalcode/migrations/0111_clinicalanatomicalcategoryedge_and_more.py delete mode 100644 CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py create mode 100644 CodeListLibrary_project/clinicalcode/models/ClinicalAnatomicalCategory.py create mode 100644 CodeListLibrary_project/clinicalcode/models/ClinicalSpecialityCategory.py diff --git a/CodeListLibrary_project/clinicalcode/admin.py b/CodeListLibrary_project/clinicalcode/admin.py index 55b32b666..22af45eeb 100644 --- a/CodeListLibrary_project/clinicalcode/admin.py +++ b/CodeListLibrary_project/clinicalcode/admin.py @@ -8,6 +8,8 @@ from .models.GenericEntity import GenericEntity from .models.Template import Template from .models.ClinicalDiseaseCategory import ClinicalDiseaseCategoryNode +from .models.ClinicalAnatomicalCategory import ClinicalAnatomicalCategoryNode +from .models.ClinicalSpecialityCategory import ClinicalSpecialityCategoryNode from .forms.TemplateForm import TemplateAdminForm from .forms.EntityClassForm import EntityAdminForm @@ -16,6 +18,16 @@ class ClinicalDiseaseCategoryNode(admin.ModelAdmin): list_display = ['id', 'name', 'code', 'coding_system', 'code_id'] +@admin.register(ClinicalSpecialityCategoryNode) +class ClinicalSpecialityCategoryNode(admin.ModelAdmin): + list_display = ['id', 'name'] + + +@admin.register(ClinicalAnatomicalCategoryNode) +class ClinicalAnatomicalCategoryNode(admin.ModelAdmin): + list_display = ['id', 'name', 'atlas_id'] + + @admin.register(CodingSystemFilter) class CodingSystemFilterAdmin(admin.ModelAdmin): list_display = ['coding_system', 'id', 'type'] diff --git a/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py index 229107805..4e1bbe7d2 100644 --- a/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py +++ b/CodeListLibrary_project/clinicalcode/management/commands/dag_tasks.py @@ -8,8 +8,11 @@ import time from ...generators.graphs.generator import Graph as GraphGenerator + from ...models.CodingSystem import CodingSystem from ...models.ClinicalDiseaseCategory import ClinicalDiseaseCategoryEdge, ClinicalDiseaseCategoryNode +from ...models.ClinicalAnatomicalCategory import ClinicalAnatomicalCategoryEdge, ClinicalAnatomicalCategoryNode +from ...models.ClinicalSpecialityCategory import ClinicalSpecialityCategoryEdge, ClinicalSpecialityCategoryNode ###################################################### @@ -43,6 +46,8 @@ class GraphType(int, enum.Enum, metaclass=IterableMeta): """ CODE_CATEGORIES = 0 + ANATOMICAL_CATEGORIES = 1 + SPECIALITY_CATEGORIES = 2 class LogType(int, enum.Enum, metaclass=IterableMeta): """ @@ -93,6 +98,14 @@ def CODE_CATEGORIES(cls, data): """ ICD-10 Disease Category builder test(s) + Note: + + ICD-10 codes were scraped from the ICD-10 classification website, + and matched with the Atlas phecodes + + This builder generates a DAG of ICD-10 codes, matched with the codes + within our database and selects the appropriate CodingSystem + """ ''' [!] Warning: This is only partially optimised ''' @@ -101,8 +114,8 @@ def CODE_CATEGORIES(cls, data): # process nodes nodes = [ ] - linkage = [ ] result = [ ] + linkage = [ ] name_hashmap = { } started = time.time() @@ -124,13 +137,13 @@ def create_linkage(parent, parent_index, children): descendants = child_data.get('children') child_count = len(descendants) if isinstance(descendants, list) else 0 - result.append(f'\t\tChildNode') + result.append(f'\t\tChildDiseaseNode') if isinstance(descendants, list): create_linkage(node, index, descendants) for root_data in data: - # clean up the section name(s) + # clean up the section name(s) from scraped data root_name = root_data.get('name').strip() matched_code = re.search(r'(\b(?=[a-zA-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*-\b(?=[A-Z\d]+)[a-zA-Z]*\d[a-zA-Z\d]*)', root_name) @@ -143,7 +156,7 @@ def create_linkage(parent, parent_index, children): nodes.append(root) children = root_data.get('sections') - result.append(f'\tRootNode') + result.append(f'\tRootDiseaseNode') create_linkage(root, index, children) @@ -168,7 +181,7 @@ def create_linkage(parent, parent_index, children): icd_10_id = CodingSystem.objects.get(name='ICD10 codes').id with connection.cursor() as cursor: - ''' [!] Note: We could probably optimise this ''' + ''' [!] Note: We could probably optimise this? ''' sql = """ -- update matched values @@ -193,7 +206,121 @@ def create_linkage(parent, parent_index, children): # create result string for log elapsed = (time.time() - started) - result = 'Created Nodes {\n%s\n}' % (icd_10_id, elapsed, '\n'.join(result)) + result = 'Created DiseaseNodes {\n%s\n}' % (icd_10_id, elapsed, '\n'.join(result)) + + return True, result + + @classmethod + def ANATOMICAL_CATEGORIES(cls, data): + """ + Anatomical category builder + + Note: + + Currently, there are no known links between anatomical categories + provided by the Atlas dataset. + + As such, this method creates a tree without any children. + + """ + + if not isinstance(data, list): + return False, 'Invalid data type, expected list but got %s' % type(data) + + # process nodes + nodes = [ ] + result = [ ] + started = time.time() + + for root_node in data: + node_id = root_node.get('id') + node_name = root_node.get('name') + + if not isinstance(node_id, int) or not isinstance(node_name, str): + err = 'Failed to create Node, expected but got Node' \ + % (type(node_id), type(node_name)) + return False, err + + node = ClinicalAnatomicalCategoryNode(atlas_id=node_id, name=node_name.strip()) + nodes.append(node) + result.append(f'\tAnatomicalRootNode') + + # bulk create nodes + nodes = ClinicalAnatomicalCategoryNode.objects.bulk_create(nodes) + + # create result string for log + elapsed = (time.time() - started) + result = 'Created AnatomicalNodes {\n%s\n}' % (elapsed, len(nodes), '\n'.join(result)) + + return True, result + + @classmethod + def SPECIALITY_CATEGORIES(cls, data): + """ + Clinical domain builder + + Note: + + This speciality data was scraped from the Atlas datasources, + it creates a tree hierarchy of clinical specialities and subspecialities + + DAG required as there are some specialities with overlap, e.g.: + + - Pre-hospital Emergency Medicine as as child of Anaesthetics, ICM and EM + - Paediatric Intensive Care Medicine as a child of Paediatrics and ICM + + """ + + if not isinstance(data, dict): + return False, 'Invalid data type, expected list but got %s' % type(data) + + # process nodes + nodes = [ ] + result = [ ] + linkage = [ ] + started = time.time() + + for root_key, children in data.items(): + root_name = root_key.strip() + root_node = ClinicalSpecialityCategoryNode(name=root_name) + + root_index = len(nodes) + nodes.append(root_node) + result.append(f'\tSpecialityRootNode') + + if len(children) > 0: + for child_key in children: + child_name = child_key.strip() + + related_index = next((i for i, e in enumerate(nodes) if e.name == child_name), None) + if related_index is None: + related_index = len(nodes) + child = ClinicalSpecialityCategoryNode(name=child_name) + nodes.append(child) + + linkage.append([root_index, related_index]) + result.append(f'\t\tChildSpecialityNode') + + # bulk create nodes & children + nodes = ClinicalSpecialityCategoryNode.objects.bulk_create(nodes) + + # bulk create edges + ClinicalSpecialityCategoryNode.children.through.objects.bulk_create( + [ + # list comprehension here is required because we need to match the instance(s) + ClinicalSpecialityCategoryNode.children.through( + name=f'{nodes[link[0]].name} | {nodes[link[1]].name}', + parent=nodes[link[0]], + child=nodes[link[1]] + ) + for link in linkage + ], + batch_size=7000 + ) + + # create result string for log + elapsed = (time.time() - started) + result = 'Created SpecialityNodes {\n%s\n}' % (elapsed, '\n'.join(result)) return True, result @@ -346,7 +473,7 @@ def __try_build_dag(self, filepath): self.__log('Building Graph from File<%s> was completed successfully' % filepath, LogType.SUCCESS) if isinstance(result, str): - self.__log(result, LogType.SUCCESS) + self.__log_to_file(result, LogType.SUCCESS) def __generate_debug_dag(self): """ @@ -406,7 +533,7 @@ def handle(self, *args, **kwargs): # det. handle self._verbose = verbose - self._log_dir = log_file + self._log_dir = log_file if isinstance(log_file, str) and len(log_file) > 0 else None if is_debug: self.__generate_debug_dag() diff --git a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicalanatomicalcategoryedge_and_more.py b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicalanatomicalcategoryedge_and_more.py new file mode 100644 index 000000000..da3b19152 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicalanatomicalcategoryedge_and_more.py @@ -0,0 +1,111 @@ +# Generated by Django 4.1.10 on 2024-01-30 13:17 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('clinicalcode', '0110_read_cd_cv2_scd_readv2_defn_ln_gin_idx_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='ClinicalAnatomicalCategoryEdge', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=1024, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ClinicalDiseaseCategoryEdge', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=1024, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ClinicalSpecialityCategoryEdge', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=1024, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='ClinicalSpecialityCategoryNode', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=510, unique=True)), + ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalSpecialityCategoryEdge', to='clinicalcode.clinicalspecialitycategorynode')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='clinicalspecialitycategoryedge', + name='child', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parent_edges', to='clinicalcode.clinicalspecialitycategorynode'), + ), + migrations.AddField( + model_name='clinicalspecialitycategoryedge', + name='parent', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='children_edges', to='clinicalcode.clinicalspecialitycategorynode'), + ), + migrations.CreateModel( + name='ClinicalDiseaseCategoryNode', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=510)), + ('code', models.CharField(blank=True, max_length=255, null=True)), + ('code_id', models.IntegerField(blank=True, null=True)), + ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalDiseaseCategoryEdge', to='clinicalcode.clinicaldiseasecategorynode')), + ('coding_system', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='disease_categories', to='clinicalcode.codingsystem')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='clinicaldiseasecategoryedge', + name='child', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parent_edges', to='clinicalcode.clinicaldiseasecategorynode'), + ), + migrations.AddField( + model_name='clinicaldiseasecategoryedge', + name='parent', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='children_edges', to='clinicalcode.clinicaldiseasecategorynode'), + ), + migrations.CreateModel( + name='ClinicalAnatomicalCategoryNode', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=510, unique=True)), + ('atlas_id', models.IntegerField(blank=True, null=True, unique=True)), + ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalAnatomicalCategoryEdge', to='clinicalcode.clinicalanatomicalcategorynode')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='clinicalanatomicalcategoryedge', + name='child', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parent_edges', to='clinicalcode.clinicalanatomicalcategorynode'), + ), + migrations.AddField( + model_name='clinicalanatomicalcategoryedge', + name='parent', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='children_edges', to='clinicalcode.clinicalanatomicalcategorynode'), + ), + ] diff --git a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py b/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py deleted file mode 100644 index 202a777a9..000000000 --- a/CodeListLibrary_project/clinicalcode/migrations/0111_clinicaldiseasecategoryedge_and_more.py +++ /dev/null @@ -1,48 +0,0 @@ -# Generated by Django 4.1.10 on 2024-01-03 18:14 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('clinicalcode', '0110_read_cd_cv2_scd_readv2_defn_ln_gin_idx_and_more'), - ] - - operations = [ - migrations.CreateModel( - name='ClinicalDiseaseCategoryEdge', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=1024, unique=True)), - ], - options={ - 'abstract': False, - }, - ), - migrations.CreateModel( - name='ClinicalDiseaseCategoryNode', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=510)), - ('code', models.CharField(blank=True, max_length=255, null=True)), - ('code_id', models.IntegerField(blank=True, null=True)), - ('children', models.ManyToManyField(blank=True, related_name='parents', through='clinicalcode.ClinicalDiseaseCategoryEdge', to='clinicalcode.clinicaldiseasecategorynode')), - ('coding_system', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='disease_categories', to='clinicalcode.codingsystem')), - ], - options={ - 'abstract': False, - }, - ), - migrations.AddField( - model_name='clinicaldiseasecategoryedge', - name='child', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parent_edges', to='clinicalcode.clinicaldiseasecategorynode'), - ), - migrations.AddField( - model_name='clinicaldiseasecategoryedge', - name='parent', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='children_edges', to='clinicalcode.clinicaldiseasecategorynode'), - ), - ] diff --git a/CodeListLibrary_project/clinicalcode/models/ClinicalAnatomicalCategory.py b/CodeListLibrary_project/clinicalcode/models/ClinicalAnatomicalCategory.py new file mode 100644 index 000000000..0a7caf54e --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/models/ClinicalAnatomicalCategory.py @@ -0,0 +1,20 @@ +from django.db import models +from django.apps import apps +from django_postgresql_dag.models import node_factory, edge_factory + +class ClinicalAnatomicalCategoryEdge(edge_factory('ClinicalAnatomicalCategoryNode', concrete=False)): + name = models.CharField(max_length=1024, unique=True) + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + self.name = f'{self.parent.name} {self.child.name}' + super().save(*args, **kwargs) + +class ClinicalAnatomicalCategoryNode(node_factory(ClinicalAnatomicalCategoryEdge)): + name = models.CharField(max_length=510, unique=True) + atlas_id = models.IntegerField(blank=True, null=True, unique=True) + + def __str__(self): + return self.name diff --git a/CodeListLibrary_project/clinicalcode/models/ClinicalSpecialityCategory.py b/CodeListLibrary_project/clinicalcode/models/ClinicalSpecialityCategory.py new file mode 100644 index 000000000..fb78ead12 --- /dev/null +++ b/CodeListLibrary_project/clinicalcode/models/ClinicalSpecialityCategory.py @@ -0,0 +1,19 @@ +from django.db import models +from django.apps import apps +from django_postgresql_dag.models import node_factory, edge_factory + +class ClinicalSpecialityCategoryEdge(edge_factory('ClinicalSpecialityCategoryNode', concrete=False)): + name = models.CharField(max_length=1024, unique=True) + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + self.name = f'{self.parent.name} {self.child.name}' + super().save(*args, **kwargs) + +class ClinicalSpecialityCategoryNode(node_factory(ClinicalSpecialityCategoryEdge)): + name = models.CharField(max_length=510, unique=True) + + def __str__(self): + return self.name diff --git a/CodeListLibrary_project/dynamic_templates/atlas_phecode.json b/CodeListLibrary_project/dynamic_templates/atlas_phecode.json index ad772c481..aec97e555 100644 --- a/CodeListLibrary_project/dynamic_templates/atlas_phecode.json +++ b/CodeListLibrary_project/dynamic_templates/atlas_phecode.json @@ -16,7 +16,7 @@ { "title": "Overview", "description": "An overview of the phenotype with basic metadata.", - "fields": ["type", "coding_system", "data_sources", "collections", "tags", "source_reference"] + "fields": ["type", "coding_system", "data_sources", "collections", "tags"] }, { "title": "Definition", @@ -37,9 +37,10 @@ "hide_if_empty": true }, { - "title": "Phecode", - "description": "The associated clinical code", - "fields": ["phecode"] + "title": "Phecodes", + "documentation": "clinical-coded-phenotype-docs", + "description": "Clinical codes used to define this phenotype.", + "fields": ["concept_information"] }, { "title": "Publication", @@ -113,13 +114,13 @@ }, "hide_on_create": true }, - "phecode": { - "title": "Phecode", - "description": "The clinical code that captures this clinical concept", - "field_type": "phecode", + "concept_information": { + "title": "Phecodes", + "description": "A set of concepts, each of which defines a list of Phecodes derived from clinical codes.", + "field_type": "concept_information", "active": true, "validation": { - "type": "phecode", + "type": "concept", "mandatory": false, "has_children": true } diff --git a/CodeListLibrary_project/dynamic_templates/clinical_coded_phenotype.json b/CodeListLibrary_project/dynamic_templates/clinical_coded_phenotype.json index 9478c527f..794866ae9 100644 --- a/CodeListLibrary_project/dynamic_templates/clinical_coded_phenotype.json +++ b/CodeListLibrary_project/dynamic_templates/clinical_coded_phenotype.json @@ -219,4 +219,4 @@ "hide_if_empty": true } } -} \ No newline at end of file +} diff --git a/CodeListLibrary_project/dynamic_templates/structured_data_algorithm_phenotype.json b/CodeListLibrary_project/dynamic_templates/structured_data_algorithm_phenotype.json index e088a9af5..c9fcd112a 100644 --- a/CodeListLibrary_project/dynamic_templates/structured_data_algorithm_phenotype.json +++ b/CodeListLibrary_project/dynamic_templates/structured_data_algorithm_phenotype.json @@ -157,4 +157,4 @@ "hide_if_empty": true } } -} \ No newline at end of file +} diff --git a/CodeListLibrary_project/dynamic_templates/working_set.json b/CodeListLibrary_project/dynamic_templates/working_set.json index 4a70dcfd9..7270207de 100644 --- a/CodeListLibrary_project/dynamic_templates/working_set.json +++ b/CodeListLibrary_project/dynamic_templates/working_set.json @@ -68,4 +68,4 @@ "hide_if_empty": true } } -} \ No newline at end of file +} From 1aced7389a85861d7dffac74578e55e9ccb75ccc Mon Sep 17 00:00:00 2001 From: JackScanlon Date: Mon, 26 Feb 2024 10:10:58 +0000 Subject: [PATCH 05/45] Merge Development --- .github/workflows/testing-pipline.yml | 33 +- CodeListLibrary_project/clinicalcode/admin.py | 1 + .../entity_utils/concept_utils.py | 271 ++++++------ .../clinicalcode/entity_utils/constants.py | 17 +- .../clinicalcode/entity_utils/create_utils.py | 3 +- .../clinicalcode/entity_utils/search_utils.py | 402 ++++++++++++++---- .../clinicalcode/entity_utils/stats_utils.py | 31 +- .../entity_utils/template_utils.py | 87 +++- ...clinicalanatomicalcategoryedge_and_more.py | 111 ----- ...toricaltemplate_hide_on_create_and_more.py | 23 + .../clinicalcode/migrations/0112_dmd_codes.py | 27 ++ .../clinicalcode/models/DMD_CODES.py | 11 + .../clinicalcode/models/Template.py | 1 + CodeListLibrary_project/clinicalcode/tasks.py | 11 +- .../clinicalcode/templatetags/cl_extras.py | 47 ++ .../templatetags/detail_pg_renderer.py | 28 +- .../templatetags/entity_renderer.py | 6 +- .../clinicalcode/tests/testing_playbook.md | 4 + .../clinicalcode/views/GenericEntity.py | 4 +- .../clinicalcode/views/View.py | 26 +- .../clinicalcode/views/adminTemp.py | 1 - CodeListLibrary_project/cll/settings.py | 32 +- .../js/clinicalcode/components/stepsWizard.js | 25 +- .../forms/clinical/conceptCreator.js | 115 ++++- .../js/clinicalcode/forms/entityCreator.js | 54 ++- .../forms/stringInputListCreator.js | 203 +++++++++ .../services/conceptSelectionService.js | 19 +- .../cll/static/js/clinicalcode/utils.js | 34 ++ .../cll/static/js/lib/xlsx.mini.min.js | 10 + .../cll/static/scss/components/_inputs.scss | 4 + .../cll/static/scss/pages/detail.scss | 33 +- .../views/clinical-coded-phenotype-docs.html | 45 +- .../generic_entity/creation/create.html | 9 + .../generic_entity/creation/select.html | 4 + .../generic_entity/detail/detail.html | 60 +-- .../generic_entity/search/search.html | 8 +- .../create/inputs/clinical/concept.html | 2 +- .../components/create/inputs/markdown.html | 2 +- .../create/inputs/string_inputlist.html | 38 ++ .../components/details/outputs/inputbox.html | 6 +- .../details/outputs/source_reference.html | 16 + .../details/outputs/string_inputlist.html | 23 + .../components/details/outputs/url_list.html | 26 ++ .../components/search/cards/generic.html | 2 +- .../cll/templates/drf-yasg/swagger-ui.html | 11 +- .../OpenCodelists_phenotype.json | 187 ++++++++ docker/development/app.Dockerfile | 2 +- docker/production/scripts/init-app.sh | 27 +- docker/test/env/app.compose.env | 1 + docs/sql-scripts/examine_codelist.sql | 177 +++++--- docs/sql-scripts/examine_codelist_sizes.sql | 163 +++++++ 51 files changed, 1965 insertions(+), 518 deletions(-) delete mode 100644 CodeListLibrary_project/clinicalcode/migrations/0111_clinicalanatomicalcategoryedge_and_more.py create mode 100644 CodeListLibrary_project/clinicalcode/migrations/0111_historicaltemplate_hide_on_create_and_more.py create mode 100644 CodeListLibrary_project/clinicalcode/migrations/0112_dmd_codes.py create mode 100644 CodeListLibrary_project/clinicalcode/models/DMD_CODES.py create mode 100644 CodeListLibrary_project/cll/static/js/clinicalcode/forms/stringInputListCreator.js create mode 100644 CodeListLibrary_project/cll/static/js/lib/xlsx.mini.min.js create mode 100644 CodeListLibrary_project/cll/templates/components/create/inputs/string_inputlist.html create mode 100644 CodeListLibrary_project/cll/templates/components/details/outputs/source_reference.html create mode 100644 CodeListLibrary_project/cll/templates/components/details/outputs/string_inputlist.html create mode 100644 CodeListLibrary_project/cll/templates/components/details/outputs/url_list.html create mode 100644 CodeListLibrary_project/dynamic_templates/OpenCodelists_phenotype.json create mode 100644 docs/sql-scripts/examine_codelist_sizes.sql diff --git a/.github/workflows/testing-pipline.yml b/.github/workflows/testing-pipline.yml index ebcaf730e..39d8fb837 100644 --- a/.github/workflows/testing-pipline.yml +++ b/.github/workflows/testing-pipline.yml @@ -3,10 +3,12 @@ name: Integration-test on: push: branches: - - main + - master paths: - "CodeListLibrary_project/**" pull_request: + branches: + - master paths: - "CodeListLibrary_project/**" @@ -115,8 +117,8 @@ jobs: if: (success() || failure()) && github.event_name != 'pull_request' continue-on-error: true with: - ref: gh-pages - path: gh-pages + ref: testing-result-pages + path: testing-result-pages - name: Allure report action # Step to generate Allure report if: (success() || failure()) && github.event_name != 'pull_request' @@ -124,22 +126,35 @@ jobs: with: allure_results: CodeListLibrary_project/clinicalcode/tests/allure-results allure_report: CodeListLibrary_project/clinicalcode/tests/allure-report - allure_history: allure-history - gh_pages: gh-pages - keep_reports: 10 # Specify the number of previous reports to keep + allure_history: allure-history + gh_pages: testing-result-pages + report_url: https://meek-florentine-a03d43.netlify.app + keep_reports: 15 # Specify the number of previous reports to keep - - name: Upload an artifact for GitHub Pages + - name: Upload an artifact files for record uses: actions/upload-pages-artifact@v2 if: (success() || failure()) && github.event_name != 'pull_request' with: name: allure-report path: CodeListLibrary_project/clinicalcode/tests/allure-report + + - name: Publish to Netlify + uses: netlify/actions/cli@master + if: (success() || failure()) && github.event_name != 'pull_request' + with: + args: deploy --dir=allure-history --prod --functions=functions + env: + NETLIFY_SITE_ID: ${{secrets.NETLIFY_SITE_ID}} + NETLIFY_AUTH_TOKEN: ${{secrets.NETLIFY_AUTH_TOKEN}} + timeout-minutes: 1 - name: Deploy report to gh-pages branch if: (success() || failure()) && github.event_name != 'pull_request' uses: peaceiris/actions-gh-pages@v2 env: PERSONAL_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PUBLISH_BRANCH: gh-pages + PUBLISH_BRANCH: testing-result-pages PUBLISH_DIR: allure-history - keep_files: true + with: + keepFiles: true + diff --git a/CodeListLibrary_project/clinicalcode/admin.py b/CodeListLibrary_project/clinicalcode/admin.py index 22af45eeb..2d833acf4 100644 --- a/CodeListLibrary_project/clinicalcode/admin.py +++ b/CodeListLibrary_project/clinicalcode/admin.py @@ -10,6 +10,7 @@ from .models.ClinicalDiseaseCategory import ClinicalDiseaseCategoryNode from .models.ClinicalAnatomicalCategory import ClinicalAnatomicalCategoryNode from .models.ClinicalSpecialityCategory import ClinicalSpecialityCategoryNode +from .models.DMD_CODES import DMD_CODES from .forms.TemplateForm import TemplateAdminForm from .forms.EntityClassForm import EntityAdminForm diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/concept_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/concept_utils.py index 541451416..3de48a1ee 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/concept_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/concept_utils.py @@ -1,14 +1,10 @@ from django.db import connection -from django.db.models import F, Value, ForeignKey, Subquery, OuterRef +from django.db.models import ForeignKey from django.http.request import HttpRequest from ..models.Concept import Concept from ..models.PublishedConcept import PublishedConcept from ..models.ConceptReviewStatus import ConceptReviewStatus -from ..models.Component import Component -from ..models.CodeList import CodeList -from ..models.ConceptCodeAttribute import ConceptCodeAttribute -from ..models.Code import Code from . import model_utils, permission_utils from .constants import ( @@ -334,123 +330,136 @@ def get_concept_component_details(concept_id, concept_history_id, aggregate_code if not historical_concept: return None - # Find the associated components (or now, rulesets) given the concept and its historical date - components = Component.history.filter( - concept__id=historical_concept.id, - history_date__lte=historical_concept.history_date - ) \ - .annotate( - was_deleted=Subquery( - Component.history.filter( - id=OuterRef('id'), - concept__id=historical_concept.id, - history_date__lte=historical_concept.history_date, - history_type='-' + seen_codes = set([]) + components_data = [] + with connection.cursor() as cursor: + sql = ''' + with components as ( + select c0.id, + max(c0.history_id) as history_id + from public.clinicalcode_historicalcomponent as c0 + left join public.clinicalcode_historicalcomponent as c1 + on c1.id = c0.id + and c1.concept_id = %(hc_id)s + and c1.history_date <= %(hc_date)s::timestamptz + and c1.history_type = '-' + where c0.concept_id = %(hc_id)s + and c0.history_date <= %(hc_date)s::timestamptz + and c0.history_type <> '-' + and c1.id is null + group by c0.id + order by c0.id asc ) - .order_by('id', '-history_id') - .distinct('id') - .values('id') - ) - ) \ - .exclude(was_deleted__isnull=False) \ - .order_by('id', '-history_id') \ - .distinct('id') - if not components.exists(): - return None + select c1.* + from components as c0 + left join public.clinicalcode_historicalcomponent as c1 + on c0.id = c1.id + and c0.history_id = c1.history_id + ''' - components_data = [] - codelist_data = [] - seen_codes = set() - for component in components: - component_data = { - 'id': component.id, - 'name': component.name, - 'logical_type': CLINICAL_RULE_TYPE(component.logical_type).name, - 'source_type': CLINICAL_CODE_SOURCE(component.component_type).name, - 'source': component.source, - } + cursor.execute( + sql, + params={ 'hc_id': historical_concept.id, 'hc_date': historical_concept.history_date } + ) - if include_source_data: - component_data |= { - 'used_description': component.used_description, - 'used_wildcard': component.used_wildcard, - 'was_wildcard_sensitive': component.was_wildcard_sensitive, + columns = [col[0] for col in cursor.description] + components = [dict(zip(columns, row)) for row in cursor.fetchall()] + + for component in components: + component_data = { + 'id': component.get('id'), + 'name': component.get('name'), + 'logical_type': CLINICAL_RULE_TYPE(component.get('logical_type')).name, + 'source_type': CLINICAL_CODE_SOURCE(component.get('component_type')).name, + 'source': component.get('source'), } - # Find the codelist associated with this component - codelist = CodeList.history.exclude(history_type='-') \ - .filter( - component__id=component.id, - history_date__lte=historical_concept.history_date - ) \ - .order_by('-history_date', '-history_id') + if include_source_data: + component_data |= { + 'used_description': component.get('used_description'), + 'used_wildcard': component.get('used_wildcard'), + 'was_wildcard_sensitive': component.get('was_wildcard_sensitive'), + } - if not codelist.exists(): - continue + if not include_codes and not aggregate_codes: + components_data.append(component_data) + continue - if include_codes or aggregate_codes: - codelist = codelist.first() - - # Find the codes associated with this codelist - codes = Code.history.filter( - code_list__id=codelist.id, - history_date__lte=historical_concept.history_date - ) \ - .annotate( - was_deleted=Subquery( - Code.history.filter( - id=OuterRef('id'), - code_list__id=codelist.id, - history_date__lte=historical_concept.history_date, - history_type='-' - ) - .order_by('code', '-history_id') - .distinct('code') - .values('id') + codes = [] + if attribute_headers is None: + sql = ''' + select code.id, + code.code, + code.description + from public.clinicalcode_historicalcodelist as codelist + join public.clinicalcode_historicalcode as code + on code.code_list_id = codelist.id + and code.history_date <= %(hc_date)s::timestamptz + left join public.clinicalcode_historicalcode as deletedcode + on deletedcode.id = code.id + and deletedcode.code_list_id = codelist.id + and deletedcode.history_date <= %(hc_date)s::timestamptz + and deletedcode.history_type = '-' + where codelist.component_id = %(hc_c_id)s + and codelist.history_date <= %(hc_date)s::timestamptz + and codelist.history_type <> '-' + and code.history_type <> '-' + and deletedcode.id is null + ''' + + cursor.execute( + sql, + params={ 'hc_c_id': component.get('id'), 'hc_date': historical_concept.history_date } ) - ) \ - .exclude(was_deleted__isnull=False) \ - .order_by('id', '-history_id') \ - .distinct('id') - component_data['code_count'] = codes.count() - - if attribute_headers is None: - # Add each code - codes = codes.values('id', 'code', 'description') - codes = list(codes) + columns = [col[0] for col in cursor.description] + codes = [dict(zip(columns, row)) for row in cursor.fetchall()] else: - # Annotate each code with its list of attribute values based on the code_attribute_header - codes = codes.annotate( - attributes=Subquery( - ConceptCodeAttribute.history.filter( - concept__id=historical_concept.id, - history_date__lte=historical_concept.history_date, - code=OuterRef('code') - ) - .annotate( - was_deleted=Subquery( - ConceptCodeAttribute.history.filter( - concept__id=historical_concept.id, - history_date__lte=historical_concept.history_date, - code=OuterRef('code'), - history_type='-' - ) - .order_by('code', '-history_id') - .distinct('code') - .values('id') - ) - ) - .exclude(was_deleted__isnull=False) - .order_by('id', '-history_id') - .distinct('id') - .values('attributes') - ) - ) \ - .values('id', 'code', 'description', 'attributes') - - codes = list(codes) + sql = ''' + select code.id, + code.code, + code.description, + attributes.attributes + from public.clinicalcode_historicalcodelist as codelist + join public.clinicalcode_historicalcode as code + on code.code_list_id = codelist.id + and code.history_date <= %(hc_date)s::timestamptz + left join public.clinicalcode_historicalcode as deleted_code + on deleted_code.id = code.id + and deleted_code.code_list_id = codelist.id + and deleted_code.history_date <= %(hc_date)s::timestamptz + and deleted_code.history_type = '-' + left join ( + select attr.* + from public.clinicalcode_historicalconceptcodeattribute as attr + left join public.clinicalcode_historicalconceptcodeattribute as deleted_attr + on deleted_attr.id = attr.id + and deleted_attr.history_type = '-' + and deleted_attr.history_date <= %(hc_date)s::timestamptz + where attr.concept_id = %(hc_id)s + and attr.history_date <= %(hc_date)s::timestamptz + and attr.history_type <> '-' + and deleted_attr.id is null + ) as attributes + on attributes.concept_id = %(hc_id)s + and attributes.history_date <= %(hc_date)s::timestamptz + and attributes.code = code.code + where codelist.component_id = %(hc_c_id)s + and codelist.history_date <= %(hc_date)s::timestamptz + and codelist.history_type <> '-' + and code.history_type <> '-' + and deleted_code.id is null + ''' + + cursor.execute( + sql, + params={ 'hc_c_id': component.get('id'), 'hc_id': historical_concept.id, 'hc_date': historical_concept.history_date } + ) + + columns = [col[0] for col in cursor.description] + codes = [dict(zip(columns, row)) for row in cursor.fetchall()] + if format_for_api: for code in codes: attributes = code.get('attributes') @@ -460,30 +469,20 @@ def get_concept_component_details(concept_id, concept_history_id, aggregate_code headers, attributes )) - # Append codes to component if required if include_codes: component_data['codes'] = codes - # Append aggregated codes if required if aggregate_codes: - codes = [ - seen_codes.add(obj.get('code')) or obj - for obj in codes - if obj.get('code') not in seen_codes - ] - codelist_data += codes + map(lambda obj: seen_codes.add(obj.get('code')) if obj.get('code') else None, codes) - components_data.append(component_data) + components_data.append(component_data) + + result = { 'components': components_data } if aggregate_codes: - return { - 'codelist': codelist_data, - 'components': components_data, - } + result.update({ 'codelist': list(seen_codes) }) - return { - 'components': components_data - } + return result def get_concept_codelist(concept_id, concept_history_id, incl_attributes=False): """ @@ -709,7 +708,7 @@ def get_minimal_concept_data(concept): ] # Clean coding system for top level field use - concept_data.pop('coding_system') + concept_data.pop('coding_system', None) # If userdata is requested, try to grab all related for field in concept._meta.fields: @@ -731,9 +730,9 @@ def get_minimal_concept_data(concept): # Clean data if required if not concept_data.get('is_deleted'): - concept_data.pop('is_deleted') - concept_data.pop('deleted_by') - concept_data.pop('deleted') + concept_data.pop('is_deleted', None) + concept_data.pop('deleted_by', None) + concept_data.pop('deleted', None) return { 'concept_id': concept.id, @@ -805,7 +804,7 @@ def get_clinical_concept_data(concept_id, concept_history_id, include_reviewed_c if not latest_version: latest_version = historical_concept - + concept_data['latest_version'] = { 'id': latest_version.id, 'history_id': latest_version.history_id, @@ -828,7 +827,7 @@ def get_clinical_concept_data(concept_id, concept_history_id, include_reviewed_c ] # Clean coding system for top level field use - concept_data.pop('coding_system') + concept_data.pop('coding_system', None) # If userdata is requested, try to grab all related if not remove_userdata: @@ -856,9 +855,9 @@ def get_clinical_concept_data(concept_id, concept_history_id, include_reviewed_c # Clean data if required if not concept_data.get('is_deleted'): - concept_data.pop('is_deleted') - concept_data.pop('deleted_by') - concept_data.pop('deleted') + concept_data.pop('is_deleted', None) + concept_data.pop('deleted_by', None) + concept_data.pop('deleted', None) # Build codelist and components from concept (modified by params) attribute_headers = concept_data.pop('code_attribute_header', None) if include_attributes else None diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/constants.py b/CodeListLibrary_project/clinicalcode/entity_utils/constants.py index 2fc506125..6c8c11b3c 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/constants.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/constants.py @@ -766,7 +766,6 @@ class FORM_METHODS(int, enum.Enum, metaclass=IterableMeta): 'input_type': 'inputbox', 'output_type': 'phenoflowid', }, - 'group_field': { 'input_type': 'group_select', }, @@ -776,7 +775,6 @@ class FORM_METHODS(int, enum.Enum, metaclass=IterableMeta): 'access_field_editable': { 'input_type': 'access_select_editable', }, - 'permissions_section': { 'system_defined': True, 'output_type': 'permissions' @@ -796,5 +794,18 @@ class FORM_METHODS(int, enum.Enum, metaclass=IterableMeta): 'history_id': { 'system_defined': True, 'output_type': 'history_id' - } + }, + 'string_inputlist': { + 'input_type': 'string_inputlist', + 'output_type': 'string_inputlist', + }, + 'url_list': { + 'input_type': 'string_inputlist', + 'output_type': 'url_list', + }, + 'source_reference': { + 'data_type': 'string', + 'input_type': 'inputbox', + 'output_type': 'source_reference' + }, } diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py index 657662042..204647503 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/create_utils.py @@ -45,6 +45,7 @@ def get_createable_entities(request): templates = Template.objects.filter( entity_class__id__in=entities.values_list('id', flat=True) ) \ + .exclude(hide_on_create=True) \ .values('id', 'template_version', 'entity_class__id', 'name', 'description') return { @@ -79,7 +80,7 @@ def get_template_creation_data(request, entity, layout, field, default=None): value = concept_utils.get_clinical_concept_data( item['concept_id'], item['concept_version_id'], - aggregate_component_codes=True, + aggregate_component_codes=False, derive_access_from=request, include_source_data=True, include_attributes=True diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/search_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/search_utils.py index 91949898b..765a1a895 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/search_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/search_utils.py @@ -1,4 +1,5 @@ from django.apps import apps +from django.db import connection from django.db.models import Q from django.db.models.functions import Lower from django.db.models.expressions import RawSQL @@ -341,7 +342,7 @@ def apply_param_to_query(query, where, params, template, param, data, else: query[f'{param}'] = data return True - + return False def try_get_template_children(entity, default=None): @@ -377,23 +378,305 @@ def try_get_template_children(entity, default=None): field_type = template_utils.try_get_content(validation, 'type') if field_type == 'concept': child_data = concept_utils.get_concept_dataset(packet, field_name=field, default=None) - + if child_data is None: continue children = children + child_data return children + +def exclude_childless_entities(entities, template_fields, child_fields): + """ + Method to exclude entities from a HistoricalGenericEntity QuerySet + where rows do not contain valid children, e.g. in the case of a + Phenotype that lacks any associated Concepts and any child codes + + Args: + entities {QuerySet}: a HistoricalGenericEntity QuerySet + + template_fields {dict}: the template layout + + child_fields {str[]}: a list of field names that are considered + to contain children + + Returns: + QuerySet containing the results + of the exlusion + + """ + entity_ids = list(entities.values_list('id', flat=True)) + if len(entity_ids) < 1: + return entities + + history_ids = list(entities.values_list('history_id', flat=True)) + with connection.cursor() as cursor: + base = ''' + with entities as ( + select * + from public.clinicalcode_historicalgenericentity + where id = ANY(%(entity_ids)s) + and history_id = ANY(%(history_ids)s) + ) + ''' + + for field in child_fields: + template_field = template_utils.try_get_content(template_fields, field) + if template_field is None: + continue + + validation = template_utils.try_get_content(template_field, 'validation') + if validation is None: + continue + + field_type = template_utils.try_get_content(validation, 'type') + if field_type == 'concept': + sql = base + ''' + select entity.id, + entity.history_id + from entities as entity, + json_array_elements(entity.template_data::json->'concept_information') as concepts + join public.clinicalcode_historicalconcept as concept + on concept.id = cast(concepts->>'concept_id' as integer) + and concept.history_id = cast(concepts->>'concept_version_id' as integer) + join public.clinicalcode_codingsystem as codingsystem + on codingsystem.id = concept.coding_system_id + join public.clinicalcode_historicalcomponent as component + on component.concept_id = concept.id + and component.history_date <= concept.history_date + and component.logical_type = 1 + and component.history_type != '-' + join public.clinicalcode_historicalcodelist as codelist + on codelist.component_id = component.id + and codelist.history_date <= concept.history_date + and codelist.history_type != '-' + join public.clinicalcode_historicalcode as code + on code.code_list_id = codelist.id + and code.history_date <= concept.history_date + and code.history_type != '-' + group by entity.id, + entity.history_id + ''' + + cursor.execute( + sql, + params={ 'entity_ids': entity_ids, 'history_ids': history_ids } + ) + + results = cursor.fetchall() + rowcount = len(results) + if rowcount < 1: + entity_ids = [] + history_ids = [] + break + + entity_ids = [None]*rowcount + history_ids = [None]*rowcount + for index, row in enumerate(results): + entity_ids[index] = row[0] + history_ids[index] = row[1] + + if len(entity_ids) > 0: + return entities.filter( + id__in=entity_ids, + history_id__in=history_ids + ) + + return GenericEntity.history.none() + +def try_search_child_concepts(entities, search=None, order_clause=None): + """ + Method to collect concept children of a HistoricalGenericEntity QuerySet + + [!] NOTE: + 1. `order_clause` is unsafe: + Do not allow unknown inputs, only allow those defined + within *entity_utils/constants.py* + + 2. This method ignores permissions: + It should only be called from a method that + has previously considered accessibility + + Args: + entities {QuerySet}: a HistoricalGenericEntity QuerySet + + search {str | None}: an optional search parameter + + order_clause {str | None}: an optional order clause + + Returns: + Either (a) dict[] array containing the results; + or (b) a null value + + """ + results = None + with connection.cursor() as cursor: + sql = '' + if not gen_utils.is_empty_string(search): + sql = ''' + with + entities as ( + select *, + cast(regexp_replace(id, '[a-zA-Z]+', '') as integer) as true_id, + ts_rank_cd( + hge.search_vector, + websearch_to_tsquery('pg_catalog.english', %(searchterm)s) + ) as score + from public.clinicalcode_historicalgenericentity as hge + where id = ANY(%(entity_ids)s) + and history_id = ANY(%(history_ids)s) + and hge.search_vector @@ to_tsquery( + 'pg_catalog.english', + replace( + websearch_to_tsquery('pg_catalog.english', %(searchterm)s)::text || ':*', + '<->', '|' + ) + ) + {0} + ), + ''' + else: + sql = ''' + with + entities as ( + select *, + cast(regexp_replace(id, '[a-zA-Z]+', '') as integer) as true_id + from public.clinicalcode_historicalgenericentity + where id = ANY(%(entity_ids)s) + and history_id = ANY(%(history_ids)s) + {0} + ), + ''' + sql = sql.format(order_clause) + + sql = sql + ''' + children as ( + select entity.id as parent_id, + entity.history_id as parent_history_id, + concept.id as id, + concept.history_id as history_id, + concept.name as name, + codingsystem.id as coding_system, + codingsystem.name as coding_system_name, + 'C' as prefix, + 'concept' as type, + 'concept_information' as field + from entities as entity, + json_array_elements(entity.template_data::json->'concept_information') as concepts + join public.clinicalcode_historicalconcept as concept + on concept.id = cast(concepts->>'concept_id' as integer) + and concept.history_id = cast(concepts->>'concept_version_id' as integer) + join public.clinicalcode_codingsystem as codingsystem + on codingsystem.id = concept.coding_system_id + ) + + select + json_agg( + json_build_object( + 'id', entity.id, + 'history_id', entity.history_id, + 'name', entity.name, + 'author', entity.author, + 'children', child.children_data + ) + ) + from entities as entity + left join ( + select elem.parent_id, + elem.parent_history_id, + json_agg( + json_build_object( + 'id', elem.id, + 'history_id', elem.history_id, + 'name', elem.name, + 'prefix', elem.prefix, + 'type', elem.type, + 'field', elem.field, + 'coding_system', elem.coding_system, + 'coding_system_name', elem.coding_system_name + ) + ) as children_data + from children as elem + group by elem.parent_id, elem.parent_history_id + ) as child + on entity.id = child.parent_id + and entity.history_id = child.parent_history_id; + ''' -def get_template_entities(request, template_id, method='GET', force_term=True): + entity_ids = list(entities.values_list('id', flat=True)) + history_ids = list(entities.values_list('history_id', flat=True)) + cursor.execute( + sql, + params={ + 'entity_ids': entity_ids, + 'history_ids': history_ids, + 'searchterm': search, + } + ) + + (results, ) = cursor.fetchone() + + return results + +def try_search_template_descendants(entities, field_type, search=None, order_clause=None): + """ + Method to search and collect descendants associated with Phenotypes + from a HistoricalGenericEntity QuerySet + + [!] NOTE: + 1. `order_clause` is unsafe: + Do not allow unknown inputs, only allow those defined + within *entity_utils/constants.py* + + 2. This method ignores permissions: + It should only be called from a method that + has previously considered accessibility + + Args: + entities {QuerySet}: a HistoricalGenericEntity QuerySet + + field_type {str}: describes the `field_type` within a template `field`'s `validation` + + search {str | None}: an optional search parameter + + order_clause {str | None}: an optional order clause + + Returns: + Either (a) dict[] array containing the results; + or (b) a null value + + """ + search = search if isinstance(search, str) else None + order_clause = order_clause if isinstance(order_clause, str) else '' + + results = None + if field_type == 'concept': + results = try_search_child_concepts(entities, search=search, order_clause=order_clause) + + return results + +@gen_utils.measure_perf +def get_template_entities(request, template_id, method='GET', force_term=True, field_type='concept'): """ Method to get a Template's entities that: 1. Are accessible to the RequestContext's user 2. Match the query parameters - + + Args: + request {RequestContext}: the HTTP Request Context + template_id {int | None}: optional template_id + method {str}: the HTTP request method + force_term {boolean}: whether to ensure validity and cleanliness of query parameters + child_field {str}: the entity & template's field to consider when computing descendants + Returns: A page of the results as defined by the query param - Contains the entities and their related children - Contains the pagination details + """ + url_parameters = getattr(request, method, None) + if not isinstance(url_parameters, dict): + return None + template = model_utils.try_get_instance(Template, pk=template_id) if template is None: return None @@ -404,103 +687,83 @@ def get_template_entities(request, template_id, method='GET', force_term=True): template_fields = template_utils.get_layout_fields(template) if template_fields is None: return None - + + child_fields = template_utils.try_get_children_field_details(fields=template_fields) + has_children = isinstance(child_fields, list) and len(child_fields) > 0 + valid_relation = next((x for x in child_fields if x.get('type') == field_type), None) if has_children else None + if valid_relation is None: + return { + 'results': [ ], + 'details': { + 'page': 1, + 'total': 1, + 'max_results': 0, + 'start_index': 0, + 'end_index': 0, + 'has_previous': False, + 'has_next': False, + }, + } + entities = permission_utils.get_accessible_entities( request, status=[constants.APPROVAL_STATUS.ANY] ) - entities = entities.filter(template__id=template_id) - entities = GenericEntity.history.filter( - id__in=entities.values_list('id', flat=True), - history_id__in=entities.values_list('history_id', flat=True) - ) metadata_filters = [key for key, value in constants.metadata.items() if 'search' in value and 'filterable' in value.get('search')] template_filters = [ ] - + for key, value in template_fields.items(): if 'search' not in value or 'filterable' not in value.get('search'): continue template_filters.append(key) - + query = { } where = [ ] params = [ ] - for param, data in getattr(request, method).items(): + for param, data in url_parameters.items(): if param in metadata_filters: apply_param_to_query(query, where, params, constants.metadata, param, data, force_term=force_term) elif param in template_filters: if template_fields is None: continue apply_param_to_query(query, where, params, template_fields, param, data, is_dynamic=True, force_term=force_term) - - entities = entities.filter(Q(**query)) - entities = entities.extra(where=where, params=params) - + + entities = entities \ + .filter(Q(template__id=template_id) & Q(**query)) \ + .extra(where=where, params=params) + + parent_id = url_parameters.get('parent_id', None) + parent_id = parent_id if not gen_utils.is_empty_string(parent_id) else None + parent_history_id = gen_utils.parse_int(url_parameters.get('parent_history_id', None)) if parent_id is not None else None + if parent_id and parent_history_id: + entities = entities.exclude(id=parent_id, history_id=parent_history_id) + + search = gen_utils.try_get_param(request, 'search', None) search_order = gen_utils.try_get_param(request, 'order_by', '1', method) - should_order_search = search_order == '1' search_order = template_utils.try_get_content(constants.ORDER_BY, search_order) if search_order is None: search_order = constants.ORDER_BY['1'] - - search = gen_utils.try_get_param(request, 'search', None) - if not gen_utils.is_empty_string(search): - entity_ids = list(entities.values_list('id', flat=True)) - entities = entities.filter( - id__in=RawSQL( - """ - select id - from clinicalcode_historicalgenericentity - where id = ANY(%s) - and search_vector @@ to_tsquery('pg_catalog.english', replace(websearch_to_tsquery('pg_catalog.english', %s)::text || ':*', '<->', '|')) - """, - [entity_ids, search] - ) - ) \ - .annotate( - score=RawSQL( - """ts_rank_cd(search_vector, websearch_to_tsquery('pg_catalog.english', %s))""", - [search] - ) - ) - if should_order_search: - entities = entities.order_by('-score') + order_clause = 'order by true_id asc' + if search_order == constants.ORDER_BY.get('1') and not gen_utils.is_empty_string(search): + order_clause = 'order by score desc' + elif search_order != constants.ORDER_BY.get('1'): + order_clause = 'order by %s %s' % (search_order.get('property'), search_order.get('order')) - if search_order != constants.ORDER_BY['1']: - search_order = search_order.get('clause') - entities = entities.order_by(search_order) - else: - if gen_utils.is_empty_string(search): - entities = entities.all().extra( - select={'true_id': """CAST(REGEXP_REPLACE(id, '[a-zA-Z]+', '') AS INTEGER)"""} - ) \ - .order_by('true_id', 'id') - - page_obj = try_get_paginated_results(request, entities, page_size=10) - - results = [ ] - for obj in page_obj.object_list: - entity = { - 'id': obj.id, - 'name': obj.name, - 'history_id': obj.history_id, - 'author': template_utils.get_entity_field(obj, 'author') or 'null' - } - - children = try_get_template_children(obj, default=[]) - if len(children) > 0: - entity.update({ 'children': children }) - results.append(entity) + results = try_search_template_descendants(entities, field_type=field_type, search=search, order_clause=order_clause) + results = results or [ ] + page_obj = try_get_paginated_results(request, results, page_size=10) + obj_list = page_obj.object_list return { - 'results': results, + 'results': obj_list, 'details': { 'page': page_obj.number, 'total': page_obj.paginator.num_pages, 'max_results': page_obj.paginator.count, 'start_index': page_obj.start_index() if len(results) > 0 else 0, - 'end_index': page_obj.end_index() - (len(page_obj.object_list) - len(results)), + 'end_index': page_obj.end_index() - (len(obj_list) - len(results)), 'has_previous': page_obj.has_previous(), 'has_next': page_obj.has_next(), }, @@ -552,7 +815,6 @@ def reorder_search_results(search_results, order=None, searchterm=''): return results -@gen_utils.measure_perf def get_renderable_entities(request, entity_types=None, method='GET', force_term=True): """ Method gets searchable, published entities and applies filters retrieved from the request param(s) @@ -653,9 +915,7 @@ def get_renderable_entities(request, entity_types=None, method='GET', force_term # Generate layouts for use in templates layouts = { } - count = 0 for template in templates: - count = count + 1 layouts[f'{template.id}/{template.template_version}'] = { 'id': template.id, 'version': template.template_version, diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/stats_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/stats_utils.py index 72f1c021a..070d7facf 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/stats_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/stats_utils.py @@ -8,6 +8,16 @@ from ..models import GenericEntity, Template, Statistics, Brand, CodingSystem, DataSource, PublishedGenericEntity, Tag from . import template_utils, constants, model_utils, entity_db_utils, concept_utils +class MockStatsUser: + """ + Fake user for use within a RequestFactory + to simulate the request used for computation of statistics + """ + is_active = True + is_superuser = False + is_authenticated = True + is_superuser = True + def sort_by_count(a, b): """ Used to sort filter statistics in descending order @@ -274,13 +284,13 @@ def clear_statistics_history(): cursor.execute(sql) -def compute_homepage_stats(request, brand): +def compute_homepage_stats(request, brand, is_mock=False): stat = get_homepage_stats(request, brand) if Statistics.objects.all().filter(org__iexact=brand, type__iexact='landing-page').exists(): stats = Statistics.objects.get(org__iexact=brand, type__iexact='landing-page') stats.stat = stat - stats.updated_by = [None, request.user][request.user.is_authenticated] + stats.updated_by = [None, request.user][request.user.is_authenticated] if not is_mock else None stats.modified = datetime.datetime.now() stats.save() @@ -291,22 +301,27 @@ def compute_homepage_stats(request, brand): org=brand, type='landing-page', stat=stat, - created_by=[None, request.user][request.user.is_authenticated] + created_by=[None, request.user][request.user.is_authenticated] if not is_mock else None ) clear_statistics_history() return [stat, obj.id] -def save_homepage_stats(request, brand=None): +def save_homepage_stats(request, brand=None, is_mock=False): if brand is not None: - return compute_homepage_stats(request, brand) + return compute_homepage_stats(request, brand, is_mock) brands = Brand.objects.all() result = [ ] for brand in brands: - result.append(compute_homepage_stats(request, brand.name)) - result.append(compute_homepage_stats(request, 'ALL')) + if is_mock: + setattr(request, 'CURRENT_BRAND', brand) + result.append(compute_homepage_stats(request, brand.name, is_mock)) + + if is_mock: + setattr(request, 'CURRENT_BRAND', None) + result.append(compute_homepage_stats(request, 'ALL', is_mock)) return result @@ -317,7 +332,7 @@ def get_homepage_stats(request, brand=None): if brand is None: brand = request.CURRENT_BRAND if request.CURRENT_BRAND is not None and request.CURRENT_BRAND != '' else 'ALL' - + collection_ids = [ ] if brand == 'ALL': collection_ids = Tag.objects.filter(tag_type=2) diff --git a/CodeListLibrary_project/clinicalcode/entity_utils/template_utils.py b/CodeListLibrary_project/clinicalcode/entity_utils/template_utils.py index 9dea9b1be..54f3d1d2e 100644 --- a/CodeListLibrary_project/clinicalcode/entity_utils/template_utils.py +++ b/CodeListLibrary_project/clinicalcode/entity_utils/template_utils.py @@ -122,14 +122,14 @@ def get_ordered_definition(definition, clean_fields=False): for field in layout_order: content = try_get_content(fields, field) if clean_fields: - content.pop('order') + content.pop('order', None) ordered_fields[field] = content definition['fields'] = ordered_fields if clean_fields: - definition.pop('layout_order') + definition.pop('layout_order', None) return definition @@ -179,6 +179,89 @@ def get_entity_field(entity, field, default=None): return default +def try_get_children_field_names(template=None, fields=None, default=None): + """ + Attempts to get the names of fields containing children that + are associated with a template's definition + + Args: + template (Model