From 93b2d234ec06d13a07942581acb2cf4908c0660a Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Wed, 30 Sep 2020 21:19:10 +0200 Subject: [PATCH 01/29] latest release nodes --- release-notes/{0.9.2.md => 0.9.3.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename release-notes/{0.9.2.md => 0.9.3.md} (96%) diff --git a/release-notes/0.9.2.md b/release-notes/0.9.3.md similarity index 96% rename from release-notes/0.9.2.md rename to release-notes/0.9.3.md index b8183385..3af07648 100644 --- a/release-notes/0.9.2.md +++ b/release-notes/0.9.3.md @@ -1,4 +1,4 @@ -# Release notes for pkdb 0.9.2 +# Release notes for pkdb 0.9.3 ## New features ### frontend From d71b49431f84e7fb953729ddcfd52b5f00dbba7c Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Thu, 1 Oct 2020 00:41:41 +0200 Subject: [PATCH 02/29] repaired timecourse downlaod --- backend/pkdb_app/studies/views.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index d0e3391b..08cfe08b 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -749,8 +749,11 @@ def sorted_tuple(v): if key=="outputs": timecourse_df = df[df["output_type"] == Output.OutputTypes.Timecourse] - timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple).apply(SubSet.to_list) - timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc=tuple).apply(SubSet.to_list) + if len(timecourse_df) !=0: + timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple).apply(SubSet.to_list) + timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc=tuple).apply(SubSet.to_list) + else: + timecourse_df = pd.DataFrame([]) timecourse_df.to_csv(string_buffer) archive.writestr(f'timecourse.csv', string_buffer.getvalue()) From 0d4999dd8fdee2b8efb10cfd2ddf06aed4edcd16 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 1 Oct 2020 01:59:31 +0200 Subject: [PATCH 03/29] pinning plotly js --- frontend/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/package.json b/frontend/package.json index 43bdf54f..bac536bb 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -16,6 +16,7 @@ "color-normalize": "1.5.0", "color-rgba": "2.1.1", "color-parse": "1.3.8", + "plotly": "1.55.2", "vega": "^5.16.1", "vega-embed": "^6.12.2", "vega-lite": "^4.16.7", From 327f0967a04bb1f5a6f02bded52f9d13d02944f0 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 1 Oct 2020 02:02:48 +0200 Subject: [PATCH 04/29] pinning plotly.js --- frontend/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/package.json b/frontend/package.json index bac536bb..3f247d2d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -16,7 +16,7 @@ "color-normalize": "1.5.0", "color-rgba": "2.1.1", "color-parse": "1.3.8", - "plotly": "1.55.2", + "plotly.js": "1.55.2", "vega": "^5.16.1", "vega-embed": "^6.12.2", "vega-lite": "^4.16.7", From de0d858db429c39e6144b8ae6e9993fd2d7e1a85 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Thu, 1 Oct 2020 16:01:19 +0200 Subject: [PATCH 05/29] repaired download --- backend/pkdb_app/behaviours.py | 1 - backend/pkdb_app/interventions/documents.py | 2 +- backend/pkdb_app/interventions/serializers.py | 56 +++++++++++-------- backend/pkdb_app/outputs/serializers.py | 41 ++++++++++++-- backend/pkdb_app/serializers.py | 1 - backend/pkdb_app/studies/serializers.py | 48 ++++++++-------- backend/pkdb_app/studies/views.py | 54 +++++++++++------- backend/pkdb_app/subjects/serializers.py | 55 ++++++++++++++++-- elastic-rebuild-index.sh | 2 + 9 files changed, 180 insertions(+), 80 deletions(-) diff --git a/backend/pkdb_app/behaviours.py b/backend/pkdb_app/behaviours.py index 70b3c92b..87df4d51 100644 --- a/backend/pkdb_app/behaviours.py +++ b/backend/pkdb_app/behaviours.py @@ -54,7 +54,6 @@ def study_sid(self): def map_field(fields): return [f"{field}_map" for field in fields] -VALUE_FIELDS_SAME_SCALE = ["value", "mean", "median", "min", "max"] VALUE_FIELDS_SAME_SCALE = ["value", "mean", "median", "min", "max"] VALUE_FIELDS_NO_UNIT = VALUE_FIELDS_SAME_SCALE + ["sd", "se", "cv"] VALUE_FIELDS = VALUE_FIELDS_NO_UNIT + ["unit"] diff --git a/backend/pkdb_app/interventions/documents.py b/backend/pkdb_app/interventions/documents.py index 047ea2fe..f8fbd117 100644 --- a/backend/pkdb_app/interventions/documents.py +++ b/backend/pkdb_app/interventions/documents.py @@ -10,7 +10,7 @@ # ------------------------------------ @registry.register_document class InterventionDocument(Document): - pk = fields.IntegerField() + pk = fields.IntegerField("pk") measurement_type = info_node("i_measurement_type") form = info_node("i_form") route = info_node("i_route") diff --git a/backend/pkdb_app/interventions/serializers.py b/backend/pkdb_app/interventions/serializers.py index 0d8fb6b5..accd1c2a 100644 --- a/backend/pkdb_app/interventions/serializers.py +++ b/backend/pkdb_app/interventions/serializers.py @@ -3,14 +3,13 @@ """ import itertools -from django.apps import apps from rest_framework import serializers from pkdb_app import utils from pkdb_app.behaviours import VALUE_FIELDS_NO_UNIT, \ MEASUREMENTTYPE_FIELDS, map_field, EX_MEASUREMENTTYPE_FIELDS from pkdb_app.info_nodes.models import InfoNode -from pkdb_app.info_nodes.serializers import MeasurementTypeableSerializer, EXMeasurementTypeableSerializer +from pkdb_app.info_nodes.serializers import MeasurementTypeableSerializer from pkdb_app.subjects.serializers import EXTERN_FILE_FIELDS from ..comments.serializers import DescriptionSerializer, CommentSerializer, DescriptionElasticSerializer, \ CommentElasticSerializer @@ -297,36 +296,49 @@ class Meta: fields = ["pk", "normed"] + INTERVENTION_FIELDS + ["study"] + MEASUREMENTTYPE_FIELDS -class InterventionElasticSerializerAnalysis(serializers.ModelSerializer): +class InterventionElasticSerializerAnalysis(serializers.Serializer): + study_sid = serializers.CharField() + study_name = serializers.CharField() intervention_pk = serializers.IntegerField(source="pk") - substance = serializers.CharField(source="substance_name", allow_null=True) - measurement_type = serializers.CharField(source="measurement_type_name",) + raw_pk = serializers.IntegerField() + normed = serializers.BooleanField() + + name = serializers.CharField() route = serializers.CharField(source="route_name",) - application = serializers.CharField(source="application_name",) form = serializers.CharField(source="form_name",) + application = serializers.CharField(source="application_name",) + time = serializers.FloatField() + time_end = serializers.FloatField() + time_unit = serializers.CharField() + measurement_type = serializers.CharField(source="measurement_type_name",) choice = serializers.CharField(source="choice_name") - value = serializers.FloatField(allow_null=True) - mean = serializers.FloatField(allow_null=True) - median = serializers.FloatField(allow_null=True) - min = serializers.FloatField(allow_null=True) - max = serializers.FloatField(allow_null=True) - sd = serializers.FloatField(allow_null=True) - se = serializers.FloatField(allow_null=True) - cv = serializers.FloatField(allow_null=True) + substance = serializers.CharField(source="substance_name", ) + + value = serializers.FloatField() + mean = serializers.FloatField() + median = serializers.FloatField() + min = serializers.FloatField() + max = serializers.FloatField() + sd = serializers.FloatField() + se = serializers.FloatField() + cv = serializers.FloatField() + unit = serializers.CharField() class Meta: - model = Intervention fields = ["study_sid", "study_name", "intervention_pk", "raw_pk", "normed"] + INTERVENTION_FIELDS + MEASUREMENTTYPE_FIELDS + """ def to_representation(self, instance): - rep = super().to_representation(instance) - for field in VALUE_FIELDS_NO_UNIT + ["time"]: - try: - rep[field] = '{:.2e}'.format(rep[field]) - except (ValueError, TypeError): - pass - return rep + rep = super().to_representation(instance) + for field in VALUE_FIELDS_NO_UNIT + ["time"]: + try: + rep[field] = '{:.2e}'.format(rep[field]) + except (ValueError, TypeError): + pass + return rep + """ + diff --git a/backend/pkdb_app/outputs/serializers.py b/backend/pkdb_app/outputs/serializers.py index 613efb80..ee8e61aa 100644 --- a/backend/pkdb_app/outputs/serializers.py +++ b/backend/pkdb_app/outputs/serializers.py @@ -14,13 +14,12 @@ from .models import ( Output, OutputSet, - OutputEx, - OutputIntervention) + OutputEx) from ..comments.serializers import DescriptionSerializer, CommentSerializer, DescriptionElasticSerializer, \ CommentElasticSerializer from ..interventions.models import Intervention from ..serializers import ( - ExSerializer, StudySmallElasticSerializer, SidNameLabelSerializer, SidNameSerializer) + ExSerializer, StudySmallElasticSerializer, SidNameLabelSerializer) from ..subjects.models import Group, DataFile, Individual from ..subjects.serializers import ( EXTERN_FILE_FIELDS, GroupSmallElasticSerializer, IndividualSmallElasticSerializer) @@ -315,12 +314,42 @@ def get_outputs(self, obj): return list_of_pk("outputs", obj) -class OutputInterventionSerializer(serializers.ModelSerializer): +class OutputInterventionSerializer(serializers.Serializer): + study_sid = serializers.CharField() + study_name = serializers.CharField() + output_pk = serializers.IntegerField() + intervention_pk = serializers.IntegerField() + group_pk = serializers.IntegerField() + individual_pk = serializers.IntegerField() + normed = serializers.BooleanField() + calculated = serializers.BooleanField() + + tissue = serializers.CharField() + method = serializers.CharField() + label = serializers.CharField() + output_type = serializers.CharField() + + time = serializers.FloatField() + time_unit = serializers.CharField() + + measurement_type =serializers.CharField() + choice = serializers.CharField() + substance =serializers.CharField() + + value = serializers.FloatField() + mean = serializers.FloatField() + median = serializers.FloatField() + min = serializers.FloatField() + max = serializers.FloatField() + sd = serializers.FloatField() + se = serializers.FloatField() + cv = serializers.FloatField() + unit = serializers.CharField() + + class Meta: - model = OutputIntervention fields = ["study_sid", "study_name", "output_pk", "intervention_pk", "group_pk", "individual_pk", "normed", "calculated"] + OUTPUT_FIELDS + MEASUREMENTTYPE_FIELDS - read_only_fields = fields class SmallOutputSerializer(serializers.ModelSerializer): diff --git a/backend/pkdb_app/serializers.py b/backend/pkdb_app/serializers.py index 358daee8..4a758a01 100644 --- a/backend/pkdb_app/serializers.py +++ b/backend/pkdb_app/serializers.py @@ -762,7 +762,6 @@ def validate_dict(dic): "detail": dic} ) - class StudySmallElasticSerializer(serializers.ModelSerializer): class Meta: model = Study diff --git a/backend/pkdb_app/studies/serializers.py b/backend/pkdb_app/studies/serializers.py index 88c176de..50f8d4b3 100644 --- a/backend/pkdb_app/studies/serializers.py +++ b/backend/pkdb_app/studies/serializers.py @@ -19,7 +19,8 @@ DescriptionElasticSerializer from ..interventions.models import DataFile, InterventionSet from ..interventions.serializers import InterventionSetSerializer, InterventionSetElasticSmallSerializer -from ..serializers import WrongKeyValidationSerializer, SidSerializer, StudySmallElasticSerializer, SidNameLabelSerializer +from ..serializers import WrongKeyValidationSerializer, SidSerializer, StudySmallElasticSerializer, \ + SidNameLabelSerializer from ..subjects.models import GroupSet, IndividualSet from ..subjects.serializers import GroupSetSerializer, IndividualSetSerializer, DataFileElasticSerializer, \ GroupSetElasticSmallSerializer, IndividualSetElasticSmallSerializer @@ -526,7 +527,7 @@ class StudyElasticSerializer(serializers.ModelSerializer): substances = SidNameLabelSerializer(many=True, ) - files = serializers.SerializerMethodField() # DataFileElasticSerializer(many=True, ) + files = serializers.SerializerMethodField() comments = CommentElasticSerializer(many=True, ) descriptions = DescriptionElasticSerializer(many=True, ) @@ -594,21 +595,39 @@ def get_files(self, obj): else: return [] -class StudyAnalysisSerializer(serializers.ModelSerializer): +class StudyAnalysisSerializer(serializers.Serializer): sid = serializers.CharField() name= serializers.CharField() licence = serializers.CharField() access = serializers.CharField() + date = serializers.DateField() + + creator = serializers.SerializerMethodField() + curators = serializers.SerializerMethodField() substances = serializers.SerializerMethodField() + reference_pmid = serializers.SerializerMethodField() reference_title = serializers.SerializerMethodField() - creator = serializers.SerializerMethodField() - curators = serializers.SerializerMethodField() + reference_date = serializers.DateField() - class Meta: - model = Study + def get_substances(self, obj): + return [s["label"] for s in obj.substances] + + def get_reference_pmid(self, obj): + return obj.reference["pmid"] + + def get_reference_title(self, obj): + return obj.reference["title"] + + def get_creator(self, obj): + return obj.creator["username"] + + def get_curators(self, obj): + return [s["username"] for s in obj.curators] + + class Meta: fields = [ "sid", "name", @@ -624,18 +643,3 @@ class Meta: ] read_only_fields = fields - - def get_substances(self, obj): - return [s["label"] for s in obj.substances] - - def get_reference_pmid(self, obj): - return obj.reference["pmid"] - - def get_reference_title(self, obj): - return obj.reference["title"] - - def get_creator(self, obj): - return obj.creator["username"] - - def get_curators(self, obj): - return [s["username"] for s in obj.curators] \ No newline at end of file diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index 08cfe08b..f2d1e212 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -2,7 +2,7 @@ import tempfile import uuid import zipfile -from collections import namedtuple +from collections import namedtuple, OrderedDict from datetime import datetime from io import StringIO from typing import Dict @@ -569,11 +569,19 @@ def _pks(self, view_class: DocumentViewSet, query_dict: Dict, pk_field: str="pk" response = queryset.source([pk_field]).params(size=scan_size).scan() return [instance[pk_field] for instance in response] - def data_by_query_dict(self,query_dict, viewset, serializer): + def data_by_query_dict(self,query_dict, viewset, serializer, boost): view = viewset(request=self.request) queryset = view.filter_queryset(view.get_queryset()) - queryset = queryset.filter("terms",**query_dict).source(serializer.Meta.fields) - return [hit.to_dict() for hit in queryset.params(size=10000).scan()] + if boost: + queryset=queryset.filter("terms", **query_dict).source(serializer.Meta.fields) + return [hit.to_dict() for hit in queryset.params(size=5000).scan()] + + else: + queryset = queryset.filter("terms", **query_dict) + + return serializer(queryset.params(size=5000).scan(), many=True).data + + class ResponseSerializer(serializers.Serializer): @@ -717,15 +725,15 @@ def serialize_scatter(ids): scatter_subsets = SubSet.objects.filter(id__in=ids).prefetch_related('data_points') return [t.scatter_representation() for t in scatter_subsets] - Sheet = namedtuple("Sheet", ["sheet_name", "query_dict", "viewset", "serializer", "function"]) + Sheet = namedtuple("Sheet", ["sheet_name", "query_dict", "viewset", "serializer", "function", "boost_performance",]) table_content = { - "studies": Sheet("Studies", {"pk": pkdata.ids["studies"]}, ElasticStudyViewSet, StudyAnalysisSerializer, None), - "groups": Sheet("Groups", {"group_pk": pkdata.ids["groups"]}, GroupCharacteristicaViewSet, GroupCharacteristicaSerializer, None), - "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, IndividualCharacteristicaViewSet,IndividualCharacteristicaSerializer, None), - "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]} ,ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None), - "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, OutputInterventionSerializer, None), + "studies": Sheet("Studies", {"pk": pkdata.ids["studies"]}, ElasticStudyViewSet, StudyAnalysisSerializer, None, False), + "groups": Sheet("Groups", {"group_pk": pkdata.ids["groups"]}, GroupCharacteristicaViewSet, GroupCharacteristicaSerializer, None, True,), + "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, IndividualCharacteristicaViewSet,IndividualCharacteristicaSerializer, None, True), + "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]} ,ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None, False), + "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, OutputInterventionSerializer,None, True), #"timecourses": Sheet("Timecourses", {"subset_pk": pkdata.ids["timecourses"]}, None, None, serialize_timecourses), - "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatter), + "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatter, None), } @@ -739,15 +747,20 @@ def serialize_scatter(ids): string_buffer = StringIO() if sheet.function: df = pd.DataFrame(sheet.function(sheet.query_dict["subset_pk"])) + df.to_csv(string_buffer) + archive.writestr(f'{key}.csv', string_buffer.getvalue()) + download_times[key] = time.time() - download_time_start else: - data = pkdata.data_by_query_dict(sheet.query_dict,sheet.viewset,sheet.serializer) - df = pd.DataFrame(data) - def sorted_tuple(v): - return sorted(tuple(v)) - - if key=="outputs": - + data = pkdata.data_by_query_dict(sheet.query_dict,sheet.viewset,sheet.serializer, sheet.boost_performance) + df = pd.DataFrame(data)[sheet.serializer.Meta.fields] + df.to_csv(string_buffer) + archive.writestr(f'{key}.csv', string_buffer.getvalue()) + download_times[key] = time.time() - download_time_start + if key == "outputs": + download_time_start_timecourse = time.time() + def sorted_tuple(v): + return sorted(tuple(v)) timecourse_df = df[df["output_type"] == Output.OutputTypes.Timecourse] if len(timecourse_df) !=0: timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple).apply(SubSet.to_list) @@ -756,10 +769,9 @@ def sorted_tuple(v): timecourse_df = pd.DataFrame([]) timecourse_df.to_csv(string_buffer) archive.writestr(f'timecourse.csv', string_buffer.getvalue()) + download_times["timecourse"] = time.time()-download_time_start_timecourse + - df.to_csv(string_buffer) - archive.writestr(f'{key}.csv', string_buffer.getvalue()) - download_times[key] = time.time()-download_time_start archive.write('download_extra/README.md', 'README.md') archive.write('download_extra/TERMS_OF_USE.md', 'TERMS_OF_USE.md') diff --git a/backend/pkdb_app/subjects/serializers.py b/backend/pkdb_app/subjects/serializers.py index 96d2d9ed..657bb3db 100644 --- a/backend/pkdb_app/subjects/serializers.py +++ b/backend/pkdb_app/subjects/serializers.py @@ -15,8 +15,7 @@ DataFile, Individual, CharacteristicaEx, - GroupEx, - GroupCharacteristica, IndividualCharacteristica) + GroupEx ) from ..comments.serializers import DescriptionSerializer, CommentSerializer, DescriptionElasticSerializer, \ CommentElasticSerializer from ..serializers import WrongKeyValidationSerializer, ExSerializer, ReadSerializer @@ -728,15 +727,59 @@ class Meta: ) -class GroupCharacteristicaSerializer(serializers.ModelSerializer): +class GroupCharacteristicaSerializer(serializers.Serializer): + study_sid = serializers.CharField() + study_name = serializers.CharField() + group_pk = serializers.IntegerField() + group_name = serializers.CharField() + group_count = serializers.IntegerField() + group_parent_pk = serializers.IntegerField() + characteristica_pk = serializers.IntegerField() + count = serializers.IntegerField() + + measurement_type =serializers.CharField() + choice = serializers.CharField() + substance =serializers.CharField() + + value = serializers.FloatField() + mean = serializers.FloatField() + median = serializers.FloatField() + min = serializers.FloatField() + max = serializers.FloatField() + sd = serializers.FloatField() + se = serializers.FloatField() + cv = serializers.FloatField() + unit = serializers.CharField() + class Meta: - model = GroupCharacteristica fields = ['study_sid', 'study_name', 'group_pk', 'group_name', 'group_count', 'group_parent_pk', 'characteristica_pk', 'count'] + MEASUREMENTTYPE_FIELDS -class IndividualCharacteristicaSerializer(serializers.ModelSerializer): +class IndividualCharacteristicaSerializer(serializers.Serializer): + + study_sid = serializers.CharField() + study_name = serializers.CharField() + individual_pk = serializers.IntegerField() + individual_name = serializers.CharField() + individual_group_pk = serializers.IntegerField() + characteristica_pk = serializers.IntegerField() + count = serializers.IntegerField() + + measurement_type = serializers.CharField() + choice = serializers.CharField() + substance = serializers.CharField() + + value = serializers.FloatField() + mean = serializers.FloatField() + median = serializers.FloatField() + min = serializers.FloatField() + max = serializers.FloatField() + sd = serializers.FloatField() + se = serializers.FloatField() + cv = serializers.FloatField() + unit = serializers.CharField() + class Meta: - model = IndividualCharacteristica fields = ['study_sid', 'study_name', 'individual_pk', 'individual_name', 'individual_group_pk', 'characteristica_pk', 'count'] + MEASUREMENTTYPE_FIELDS diff --git a/elastic-rebuild-index.sh b/elastic-rebuild-index.sh index 25257f50..0d41e2f9 100755 --- a/elastic-rebuild-index.sh +++ b/elastic-rebuild-index.sh @@ -4,3 +4,5 @@ # ----------------------------------------------------------------------------- : "${PKDB_DOCKER_COMPOSE_YAML:?The 'PKDB_*' environment variables must be exported.}" docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py search_index --rebuild -f +# rebuild single +# docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py search_index --rebuild -f --models [e.g interventions] \ No newline at end of file From 10cb8c717c74be3b438daa2f048b75f0f15b3d80 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Thu, 1 Oct 2020 17:51:01 +0200 Subject: [PATCH 06/29] working version --- backend/pkdb_app/data/documents.py | 38 +++++++++++++++- backend/pkdb_app/data/models.py | 16 ++++--- backend/pkdb_app/data/serializers.py | 51 +++++++++++++++++++++- backend/pkdb_app/outputs/pk_calculation.py | 2 +- backend/pkdb_app/studies/views.py | 44 ++++++++++++++----- 5 files changed, 131 insertions(+), 20 deletions(-) diff --git a/backend/pkdb_app/data/documents.py b/backend/pkdb_app/data/documents.py index bf980045..a7dfa044 100644 --- a/backend/pkdb_app/data/documents.py +++ b/backend/pkdb_app/data/documents.py @@ -1,6 +1,6 @@ from django_elasticsearch_dsl import Document, fields, ObjectField from django_elasticsearch_dsl.registries import registry -from pkdb_app.data.models import Dimension, SubSet +from pkdb_app.data.models import Dimension, SubSet, Data from ..documents import string_field, elastic_settings, info_node, study_field @@ -120,4 +120,38 @@ class Index: settings = elastic_settings settings['number_of_shards'] = 5 settings['number_of_replicas'] = 1 - settings['max_result_window'] = 100000 \ No newline at end of file + settings['max_result_window'] = 100000 +''' +@registry.register_document +class TimeCourseDocument(Document): + study_sid = string_field('study_sid') + study_name = string_field('study_name') + # for permissions + access = string_field('access') + allowed_users = fields.ObjectField( + attr="allowed_users", + properties={ + 'username': string_field("username") + }, + multi=True + ) + + def get_queryset(self): + """Not mandatory but to improve performance we can select related in one sql request""" + return super(TimeCourseDocument, self).get_queryset().filter(data__data_type=Data.DataTypes.Timecourse) # .prefetch_related("interventions"). + + + class Django: + model = SubSet + # Ignore auto updating of Elasticsearch when a model is saved/deleted + ignore_signals = True + # Don't perform an index refresh after every update + auto_refresh = False + + class Index: + name = 'subset' + settings = elastic_settings + settings['number_of_shards'] = 5 + settings['number_of_replicas'] = 1 + settings['max_result_window'] = 100000 +''' diff --git a/backend/pkdb_app/data/models.py b/backend/pkdb_app/data/models.py index 837003e6..f9890df1 100644 --- a/backend/pkdb_app/data/models.py +++ b/backend/pkdb_app/data/models.py @@ -1,6 +1,9 @@ import itertools +from collections import Iterable + from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from django.db import models +from django.utils.functional import cached_property from pkdb_app.behaviours import Accessible from pkdb_app.interventions.models import Intervention from pkdb_app.utils import CHAR_MAX_LENGTH @@ -114,7 +117,7 @@ def keys_timecourse_representation(self): "time": 'outputs__time', 'time_unit': 'outputs__time_unit', "measurement_type": "outputs__measurement_type__info_node__sid", - "measurement__label": "outputs__measurement_type__info_node__label", + "measurement_type__label": "outputs__measurement_type__info_node__label", "choice": "outputs__choice__info_node__sid", "choice_label": "outputs__choice__info_node__label", "substance": "outputs__substance__info_node__sid", @@ -144,10 +147,10 @@ def _timecourse_extra(self): @staticmethod def none_tuple(values): - if all(pd.isna(v) for v in values): - return (None,) - else: - return tuple(values) + if isinstance(values, Iterable): + if all(pd.isna(v) for v in values): + return (None,) + return tuple(values) @staticmethod def to_list(tdf): @@ -213,6 +216,7 @@ def validate_timecourse(self, timecourse): raise ValueError(f"Subset used for timecourse is not unique on '{key}'. Values are '{name}'. " f"Check uniqueness of labels for timecourses.") + @cached_property def timecourse(self): """ FIXME: Documentation """ tc = self.merge_values( @@ -231,6 +235,7 @@ def reformat_timecourse(self, timecourse, mapping): if isinstance(timecourse[new_key], int): timecourse[new_key] = (timecourse[new_key],) + @cached_property def timecourse_representation(self): """ FIXME: Documentation """ timecourse = self.merge_values( @@ -245,6 +250,7 @@ def keys_scatter_representation(self): "data_point": "pk" } + @cached_property def scatter_representation(self): scatter_x = self.merge_values(self.data_points.filter(dimensions__dimension=0).values(*self.keys_scatter_representation().values()), sort_values=None) self.reformat_timecourse(scatter_x, self.keys_scatter_representation()) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 8f27e5df..523d4e74 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -1,11 +1,12 @@ import traceback +from pkdb_app.behaviours import MEASUREMENTTYPE_FIELDS from pkdb_app.comments.serializers import DescriptionSerializer, CommentSerializer, CommentElasticSerializer, \ DescriptionElasticSerializer from pkdb_app.data.models import DataSet, Data, SubSet, Dimension, DataPoint from pkdb_app.outputs.models import Output from pkdb_app.outputs.pk_calculation import pkoutputs_from_timecourse -from pkdb_app.outputs.serializers import OUTPUT_FOREIGN_KEYS +from pkdb_app.outputs.serializers import OUTPUT_FOREIGN_KEYS, OUTPUT_FIELDS from pkdb_app.serializers import WrongKeyValidationSerializer, ExSerializer, StudySmallElasticSerializer from pkdb_app.subjects.models import DataFile from pkdb_app.utils import _create, create_multiple_bulk, create_multiple_bulk_normalized, list_of_pk @@ -406,3 +407,51 @@ class Meta: "output_pk", "dimension"] read_only_fields = fields + + +class TimecourseSerializer(serializers.Serializer): + study_sid = serializers.CharField() + study_name = serializers.CharField() + output_pk = serializers.ListField(serializers.IntegerField()) + subset_pk = serializers.IntegerField() + subset_name = serializers.CharField() + interventions = serializers.ListField(serializers.IntegerField()) + group_pk = serializers.IntegerField() + individual_pk = serializers.IntegerField() + normed = serializers.BooleanField() + calculated = serializers.BooleanField() + + tissue = serializers.CharField() + tissue_label = serializers.CharField() + + method = serializers.CharField() + method_label = serializers.CharField() + + + label = serializers.CharField() + output_type = serializers.CharField() + + time = serializers.FloatField() + time_unit = serializers.CharField() + + measurement_type =serializers.CharField() + measurement_type__label =serializers.CharField() + choice = serializers.CharField() + choice_label =serializers.CharField() + substance =serializers.CharField() + substance_label =serializers.CharField() + + value = serializers.ListField(serializers.IntegerField()) + mean = serializers.ListField(serializers.IntegerField()) + median = serializers.ListField(serializers.IntegerField()) + min = serializers.ListField(serializers.IntegerField()) + max = serializers.ListField(serializers.IntegerField()) + sd = serializers.ListField(serializers.IntegerField()) + se = serializers.ListField(serializers.IntegerField()) + cv = serializers.ListField(serializers.IntegerField()) + unit = serializers.CharField() + + + class Meta: + fields = ["study_sid", "study_name", "output_pk", "intervention_pk", "group_pk", "individual_pk", "normed", + "calculated"] + OUTPUT_FIELDS + MEASUREMENTTYPE_FIELDS \ No newline at end of file diff --git a/backend/pkdb_app/outputs/pk_calculation.py b/backend/pkdb_app/outputs/pk_calculation.py index aea19396..f52109ba 100644 --- a/backend/pkdb_app/outputs/pk_calculation.py +++ b/backend/pkdb_app/outputs/pk_calculation.py @@ -31,7 +31,7 @@ def pkoutputs_from_timecourse(subset:Subset) -> List[Dict]: """ outputs = [] dosing = subset.get_single_dosing() - timecourse = subset.timecourse() + timecourse = subset.timecourse # dosing information must exist if not dosing: return outputs diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index f2d1e212..803013f5 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -2,7 +2,7 @@ import tempfile import uuid import zipfile -from collections import namedtuple, OrderedDict +from collections import namedtuple from datetime import datetime from io import StringIO from typing import Dict @@ -10,6 +10,7 @@ import pandas as pd from django.db import connection from django.test.client import RequestFactory +import numpy as np import django_filters.rest_framework from django.core.exceptions import ObjectDoesNotExist @@ -721,9 +722,13 @@ def get(self, request, *args, **kw): - def serialize_scatter(ids): - scatter_subsets = SubSet.objects.filter(id__in=ids).prefetch_related('data_points') - return [t.scatter_representation() for t in scatter_subsets] + def serialize_scatters(ids): + scatter_subsets = SubSet.objects.filter(id__in=ids) + return [t.scatter_representation for t in scatter_subsets] + + def serialize_timecourses(ids): + scatter_subsets = SubSet.objects.filter(id__in=ids) + return [t.timecourse_representation for t in scatter_subsets] Sheet = namedtuple("Sheet", ["sheet_name", "query_dict", "viewset", "serializer", "function", "boost_performance",]) table_content = { @@ -732,8 +737,8 @@ def serialize_scatter(ids): "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, IndividualCharacteristicaViewSet,IndividualCharacteristicaSerializer, None, True), "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]} ,ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None, False), "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, OutputInterventionSerializer,None, True), - #"timecourses": Sheet("Timecourses", {"subset_pk": pkdata.ids["timecourses"]}, None, None, serialize_timecourses), - "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatter, None), + "timecourses": Sheet("Timecourses", {"subset_pk": pkdata.ids["timecourses"]}, None, None, serialize_timecourses, None), + "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatters, None), } @@ -752,24 +757,41 @@ def serialize_scatter(ids): download_times[key] = time.time() - download_time_start else: - data = pkdata.data_by_query_dict(sheet.query_dict,sheet.viewset,sheet.serializer, sheet.boost_performance) - df = pd.DataFrame(data)[sheet.serializer.Meta.fields] + df = pd.DataFrame(pkdata.data_by_query_dict(sheet.query_dict,sheet.viewset,sheet.serializer, sheet.boost_performance)) + if len(df) < 0: + df = df[sheet.serializer.Meta.fields] df.to_csv(string_buffer) archive.writestr(f'{key}.csv', string_buffer.getvalue()) download_times[key] = time.time() - download_time_start + """ if key == "outputs": + string_buffer = StringIO() download_time_start_timecourse = time.time() def sorted_tuple(v): return sorted(tuple(v)) timecourse_df = df[df["output_type"] == Output.OutputTypes.Timecourse] + + def unique_or_sorted_list(v): + values = v.unique() + if len(values) == 1: + return values[0] + return tuple(values) + if len(timecourse_df) !=0: - timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple).apply(SubSet.to_list) - timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc=tuple).apply(SubSet.to_list) + #timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple, dropna=False).apply(SubSet.to_list) + #timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc=tuple, dropna=False).apply(SubSet.to_list) + timecourse_df = pd.pivot_table(data=timecourse_df, index=["output_pk"],aggfunc=unique_or_sorted_list,fill_value=np.NAN)#.reset_index() + timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc= unique_or_sorted_list, fill_value=np.NAN)#.reset_index() + print(timecourse_df.columns) + + #timecourse_df = timecourse_df[table_content["outputs"].serializer.Meta.fields] else: timecourse_df = pd.DataFrame([]) timecourse_df.to_csv(string_buffer) - archive.writestr(f'timecourse.csv', string_buffer.getvalue()) + archive.writestr('timecourse.csv', string_buffer.getvalue()) download_times["timecourse"] = time.time()-download_time_start_timecourse + """ + archive.write('download_extra/README.md', 'README.md') From 2b2cc3ebba1fc769d82ad18a7e818c3674c2527a Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Thu, 1 Oct 2020 18:39:40 +0200 Subject: [PATCH 07/29] closes #660 --- backend/pkdb_app/data/documents.py | 7 +- backend/pkdb_app/data/models.py | 128 +++++++++--------- .../src/components/detail/SubjectDetail.vue | 2 +- 3 files changed, 74 insertions(+), 63 deletions(-) diff --git a/backend/pkdb_app/data/documents.py b/backend/pkdb_app/data/documents.py index a7dfa044..77a312d0 100644 --- a/backend/pkdb_app/data/documents.py +++ b/backend/pkdb_app/data/documents.py @@ -121,11 +121,15 @@ class Index: settings['number_of_shards'] = 5 settings['number_of_replicas'] = 1 settings['max_result_window'] = 100000 + + ''' @registry.register_document class TimeCourseDocument(Document): study_sid = string_field('study_sid') study_name = string_field('study_name') + outputs_pk = fields.ListField('timecourse') + # for permissions access = string_field('access') allowed_users = fields.ObjectField( @@ -154,4 +158,5 @@ class Index: settings['number_of_shards'] = 5 settings['number_of_replicas'] = 1 settings['max_result_window'] = 100000 -''' + +''' \ No newline at end of file diff --git a/backend/pkdb_app/data/models.py b/backend/pkdb_app/data/models.py index f9890df1..2d62a74d 100644 --- a/backend/pkdb_app/data/models.py +++ b/backend/pkdb_app/data/models.py @@ -37,41 +37,37 @@ class DataTypes(models.TextChoices): image = models.ForeignKey('subjects.DataFile', related_name="data", on_delete=models.CASCADE, null=True) dataset = models.ForeignKey(DataSet, related_name="data", on_delete=models.CASCADE, null=True) +class Timecourseable(models.Model): + class Meta: + abstract = True -class SubSet(Accessible): - name = models.CharField(max_length=CHAR_MAX_LENGTH) - data = models.ForeignKey(Data, related_name="subsets", on_delete=models.CASCADE) - study = models.ForeignKey('studies.Study', on_delete=models.CASCADE, related_name="subsets") - - def get_single_dosing(self) -> Intervention: - """Returns a single intervention of type dosing if existing. - If multiple dosing interventions exist, no dosing is returned!. - """ - try: - dosing_measurement_type = Intervention.objects.filter(id__in=self.interventions).get( - normed=True, measurement_type__info_node__name="dosing" - ) - return dosing_measurement_type - - except (ObjectDoesNotExist, MultipleObjectsReturned): - return None - - @property - def array(self): - [point.values_list("output") for point in self.data_points] - return self.data.data_type + @cached_property + def timecourse(self): + """ FIXME: Documentation """ - @property - def data_type(self): - return self.data.data_type + tc = self.merge_values( + self.data_points.prefetch_related('outputs').values(*self._timecourse_extra().values()), + sort_values=["outputs__interventions__pk", "outputs__time"] + ) + self.reformat_timecourse(tc, self._timecourse_extra()) + self.validate_timecourse(tc) + return tc - @property - def outputs(self): - return self.data_points.values_list('outputs', flat=True) + def reformat_timecourse(self, timecourse, mapping): + """ FIXME: Documentation & type hinting """ + for new_key, old_key in mapping.items(): + timecourse[new_key] = timecourse.pop(old_key) + if new_key == "interventions": + if isinstance(timecourse[new_key], int): + timecourse[new_key] = (timecourse[new_key],) - @property - def interventions(self): - return self.data_points.values_list('outputs__interventions', flat=True) + @cached_property + def timecourse_representation(self): + """ FIXME: Documentation """ + timecourse = self.merge_values( + self.data_points.values(*self.keys_timecourse_representation().values()), ) + self.reformat_timecourse(timecourse, self.keys_timecourse_representation()) + return timecourse def timecourse_extra_no_intervention(self): return { @@ -96,9 +92,10 @@ def timecourse_extra_no_intervention(self): 'time_unit': 'outputs__time_unit', 'unit': 'outputs__unit', } + def keys_timecourse_representation(self): return { - "study_sid":"outputs__study__sid", + "study_sid": "outputs__study__sid", "study_name": "outputs__study__name", "outputs_pk": "outputs__pk", "subset_pk": "subset_id", @@ -116,9 +113,9 @@ def keys_timecourse_representation(self): "output_type": 'outputs__output_type', "time": 'outputs__time', 'time_unit': 'outputs__time_unit', - "measurement_type": "outputs__measurement_type__info_node__sid", + "measurement_type": "outputs__measurement_type__info_node__sid", "measurement_type__label": "outputs__measurement_type__info_node__label", - "choice": "outputs__choice__info_node__sid", + "choice": "outputs__choice__info_node__sid", "choice_label": "outputs__choice__info_node__label", "substance": "outputs__substance__info_node__sid", "substance_label": "outputs__substance__info_node__label", @@ -169,10 +166,11 @@ def _tuple_or_value(values): return tuple(values) @staticmethod - def merge_values(values=None ,df=None, groupby=("outputs__pk",), sort_values=["outputs__interventions__pk","outputs__time"]): + def merge_values(values=None, df=None, groupby=("outputs__pk",), + sort_values=["outputs__interventions__pk", "outputs__time"]): if values: - df =pd.DataFrame(values) + df = pd.DataFrame(values) if sort_values: df = df.sort_values(sort_values) merged_dict = df.groupby(list(groupby), as_index=False).apply(SubSet.to_list).to_dict("list") @@ -180,7 +178,6 @@ def merge_values(values=None ,df=None, groupby=("outputs__pk",), sort_values=["o for key, values in merged_dict.items(): if key not in ['outputs__time', 'outputs__value', 'outputs__mean', 'outputs__median', 'outputs__cv', 'outputs__sd' 'outputs__se']: - merged_dict[key] = SubSet.tuple_or_value(values) if all(v is None for v in values): @@ -216,32 +213,41 @@ def validate_timecourse(self, timecourse): raise ValueError(f"Subset used for timecourse is not unique on '{key}'. Values are '{name}'. " f"Check uniqueness of labels for timecourses.") - @cached_property - def timecourse(self): - """ FIXME: Documentation """ - tc = self.merge_values( - self.data_points.prefetch_related('outputs').values(*self._timecourse_extra().values()), - sort_values=["outputs__interventions__pk", "outputs__time"] - ) - self.reformat_timecourse(tc, self._timecourse_extra()) - self.validate_timecourse(tc) - return tc - def reformat_timecourse(self, timecourse, mapping): - """ FIXME: Documentation & type hinting """ - for new_key, old_key in mapping.items(): - timecourse[new_key] = timecourse.pop(old_key) - if new_key == "interventions": - if isinstance(timecourse[new_key], int): - timecourse[new_key] = (timecourse[new_key],) +class SubSet(Accessible, Timecourseable): + name = models.CharField(max_length=CHAR_MAX_LENGTH) + data = models.ForeignKey(Data, related_name="subsets", on_delete=models.CASCADE) + study = models.ForeignKey('studies.Study', on_delete=models.CASCADE, related_name="subsets") - @cached_property - def timecourse_representation(self): - """ FIXME: Documentation """ - timecourse = self.merge_values( - self.data_points.values(*self.keys_timecourse_representation().values()),) - self.reformat_timecourse(timecourse, self.keys_timecourse_representation()) - return timecourse + def get_single_dosing(self) -> Intervention: + """Returns a single intervention of type dosing if existing. + If multiple dosing interventions exist, no dosing is returned!. + """ + try: + dosing_measurement_type = Intervention.objects.filter(id__in=self.interventions).get( + normed=True, measurement_type__info_node__name="dosing" + ) + return dosing_measurement_type + + except (ObjectDoesNotExist, MultipleObjectsReturned): + return None + + @property + def array(self): + [point.values_list("output") for point in self.data_points] + return self.data.data_type + + @property + def data_type(self): + return self.data.data_type + + @property + def outputs(self): + return self.data_points.values_list('outputs', flat=True) + + @property + def interventions(self): + return self.data_points.values_list('outputs__interventions', flat=True) def keys_scatter_representation(self): """ FIXME: Documentation """ diff --git a/frontend/src/components/detail/SubjectDetail.vue b/frontend/src/components/detail/SubjectDetail.vue index ff3f5958..1f379540 100644 --- a/frontend/src/components/detail/SubjectDetail.vue +++ b/frontend/src/components/detail/SubjectDetail.vue @@ -4,7 +4,7 @@

{{ faIcon(subject_type) }} {{ subject.name }} - +

From d5fe9104f7b07f906f3ba2705f2ae92b2f734f93 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 1 Oct 2020 21:06:31 +0200 Subject: [PATCH 08/29] Fix #665, django update --- backend/requirements.txt | 2 +- release-notes/0.9.4.md | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 release-notes/0.9.4.md diff --git a/backend/requirements.txt b/backend/requirements.txt index 3d71973e..01f73f9c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,7 +5,7 @@ json-logging>=1.2.6 psycopg2-binary>=2.8.5 # django -Django == 3.1.1 +Django == 3.1.2 django-model-utils>=4.0.0 django-extra-fields>=3.0.0 django-storages>=1.9.1 diff --git a/release-notes/0.9.4.md b/release-notes/0.9.4.md new file mode 100644 index 00000000..5c2b6ac1 --- /dev/null +++ b/release-notes/0.9.4.md @@ -0,0 +1,9 @@ +# Release notes for pkdb 0.9.4 + +## New features + +## Fixes +- bugfix groups and individuals JSON button (#660) +- security bugfix django (#665) + +## Deprecated features From 40bf3f79346dacd91f1b239849eca186fd9acb68 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Fri, 2 Oct 2020 10:58:51 +0200 Subject: [PATCH 09/29] Fix for plotly issues --- frontend/package.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frontend/package.json b/frontend/package.json index 3f247d2d..792b7f0c 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,10 +13,6 @@ "acorn": "^7.4.0", "axios": "^0.19.2", "base-64": "^0.1.0", - "color-normalize": "1.5.0", - "color-rgba": "2.1.1", - "color-parse": "1.3.8", - "plotly.js": "1.55.2", "vega": "^5.16.1", "vega-embed": "^6.12.2", "vega-lite": "^4.16.7", From d2ac34b5629182869551fd13f9fe1e8a3dcecd1c Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Sun, 4 Oct 2020 20:36:24 +0200 Subject: [PATCH 10/29] better timecourse serialization --- backend/pkdb_app/data/documents.py | 6 +- backend/pkdb_app/data/models.py | 11 +- backend/pkdb_app/data/serializers.py | 233 +++++++++++++++++++++------ backend/pkdb_app/studies/views.py | 12 +- 4 files changed, 198 insertions(+), 64 deletions(-) diff --git a/backend/pkdb_app/data/documents.py b/backend/pkdb_app/data/documents.py index 77a312d0..409d8fb4 100644 --- a/backend/pkdb_app/data/documents.py +++ b/backend/pkdb_app/data/documents.py @@ -98,16 +98,18 @@ class SubSetDocument(Document): study = study_field study_sid = string_field('study_sid') study_name = string_field('study_name') - # for permissions access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", + properties={ 'username': string_field("username") }, multi=True ) - + def get_queryset(self): + """Not mandatory but to improve performance we can select related in one sql request""" + return super(SubSetDocument, self).get_queryset().prefetch_related("data_points__outputs") class Django: model = SubSet # Ignore auto updating of Elasticsearch when a model is saved/deleted diff --git a/backend/pkdb_app/data/models.py b/backend/pkdb_app/data/models.py index 2d62a74d..f62d63e8 100644 --- a/backend/pkdb_app/data/models.py +++ b/backend/pkdb_app/data/models.py @@ -40,6 +40,8 @@ class DataTypes(models.TextChoices): class Timecourseable(models.Model): class Meta: abstract = True + def output_pk(self): + return self.data_points.values_list("outputs__pk") @cached_property def timecourse(self): @@ -64,10 +66,11 @@ def reformat_timecourse(self, timecourse, mapping): @cached_property def timecourse_representation(self): """ FIXME: Documentation """ - timecourse = self.merge_values( - self.data_points.values(*self.keys_timecourse_representation().values()), ) - self.reformat_timecourse(timecourse, self.keys_timecourse_representation()) - return timecourse + if self.data.data_type == Data.DataTypes.Timecourse: + timecourse = self.merge_values( + self.data_points.values(*self.keys_timecourse_representation().values()), ) + self.reformat_timecourse(timecourse, self.keys_timecourse_representation()) + return timecourse def timecourse_extra_no_intervention(self): return { diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 523d4e74..7dabf057 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -1,8 +1,11 @@ +import json import traceback - +from cachetools import cached +from django_elasticsearch_dsl_drf.serializers import DocumentSerializer from pkdb_app.behaviours import MEASUREMENTTYPE_FIELDS from pkdb_app.comments.serializers import DescriptionSerializer, CommentSerializer, CommentElasticSerializer, \ DescriptionElasticSerializer +from pkdb_app.data.documents import SubSetDocument from pkdb_app.data.models import DataSet, Data, SubSet, Dimension, DataPoint from pkdb_app.outputs.models import Output from pkdb_app.outputs.pk_calculation import pkoutputs_from_timecourse @@ -14,6 +17,8 @@ import pandas as pd import numpy as np +from functools import lru_cache + class DimensionSerializer(WrongKeyValidationSerializer): output = serializers.CharField(write_only=True, allow_null=False, allow_blank=False) @@ -360,26 +365,196 @@ def create(self, validated_data): # Read Serializer ################################ +class TimecourseSerializer(serializers.Serializer): + study_sid = serializers.CharField() + study_name = serializers.CharField() + output_pk = serializers.SerializerMethodField() + subset_pk = serializers.IntegerField(source="pk") + subset_name = serializers.CharField(source="name") + + interventions = serializers.SerializerMethodField() + group_pk = serializers.SerializerMethodField() + individual_pk = serializers.SerializerMethodField() + normed = serializers.SerializerMethodField() + + tissue = serializers.SerializerMethodField() + tissue_label = serializers.SerializerMethodField() + + method = serializers.SerializerMethodField() + method_label = serializers.SerializerMethodField() + + label = serializers.SerializerMethodField() + + time = serializers.SerializerMethodField() + time_unit = serializers.SerializerMethodField() + + measurement_type =serializers.SerializerMethodField() + measurement_type_label =serializers.SerializerMethodField() + choice = serializers.SerializerMethodField() + choice_label =serializers.SerializerMethodField() + + substance = serializers.SerializerMethodField() + substance_label = serializers.SerializerMethodField() + + value = serializers.SerializerMethodField() + mean = serializers.SerializerMethodField() + median = serializers.SerializerMethodField() + min = serializers.SerializerMethodField() + max = serializers.SerializerMethodField() + sd = serializers.SerializerMethodField() + se = serializers.SerializerMethodField() + cv = serializers.SerializerMethodField() + unit = serializers.SerializerMethodField() + + @lru_cache(maxsize=64) + def _get_general(self,obj): + obj = [v["point"][0] for v in json.loads(obj)["array"]] + result = pd.DataFrame(obj) + + return result + + def _get_field(self, obj, field): + result = self._get_general(json.dumps(obj.to_dict())) + if result[field].isnull().all(): + return None + return result[field].values + + def get_output_pk(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["pk"].values + + def get_interventions(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return [i["pk"] for i in result["interventions"].iloc[0]] + + def get_group_pk(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["group"][0]: + return result["group"][0]["pk"] + + def get_individual_pk(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["individual"][0]: + return result["individual"][0]["pk"] + + def get_normed(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["normed"][0] + + + def get_tissue(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["tissue"][0]: + return result["tissue"][0]["sid"] + + def get_tissue_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["tissue"][0]: + return result["tissue"][0]["label"] + + def get_method(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["method"][0]: + return result["method"][0]["sid"] + + def get_method_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["method"][0]: + return result["method"][0]["label"] + + def get_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["label"][0] + + def get_time(self, obj): + return self._get_field(obj, "time") + + + def get_time_unit(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["time_unit"][0] + + def get_measurement_type(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["measurement_type"][0]["sid"] + + def get_measurement_type_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["measurement_type"][0]["label"] + + def get_choice(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["choice"][0]: + return result["choice"][0]["sid"] -class SubSetElasticSerializer(serializers.ModelSerializer): + def get_choice_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["choice"][0]: + return result["choice"][0]["label"] + def get_substance(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["substance"][0]: + return result["substance"][0]["sid"] + + def get_substance_label(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + if result["substance"][0]: + return result["substance"][0]["label"] + + def get_value(self, obj): + return self._get_field(obj, "value") + + def get_mean(self, obj): + return self._get_field(obj, "mean") + + def get_median(self, obj): + return self._get_field(obj, "median") + + def get_min(self, obj): + return self._get_field(obj, "min") + + def get_max(self, obj): + return self._get_field(obj, "max") + + def get_sd(self, obj): + return self._get_field(obj, "sd") + + def get_se(self, obj): + return self._get_field(obj, "se") + + def get_cv(self, obj): + return self._get_field(obj, "cv") + + def get_unit(self, obj): + result = self._get_general(json.dumps(obj.to_dict())) + return result["unit"][0] + + class Meta: + fields = ["study_sid", "study_name", "output_pk", "intervention_pk", "group_pk", "individual_pk", "normed", + "calculated"] + OUTPUT_FIELDS + MEASUREMENTTYPE_FIELDS + +class SubSetElasticSerializer(DocumentSerializer): study = StudySmallElasticSerializer(read_only=True) name = serializers.CharField() data_type = serializers.CharField() array = serializers.SerializerMethodField() + class Meta: - model = SubSet - fields = ["pk", "study", + document = SubSetDocument + fields = ["pk", + "study", "name", "data_type", - "array"] + "array", + "timecourse"] def get_array(self,object): - #return [[SmallOutputSerializer(point.point,many=True, read_only=True).data] for point in object["array"]] return [point["point"] for point in object.to_dict()["array"]] + class DataSetElasticSmallSerializer(serializers.ModelSerializer): descriptions = DescriptionElasticSerializer(many=True, read_only=True) comments = CommentElasticSerializer(many=True, read_only=True) @@ -409,49 +584,3 @@ class Meta: read_only_fields = fields -class TimecourseSerializer(serializers.Serializer): - study_sid = serializers.CharField() - study_name = serializers.CharField() - output_pk = serializers.ListField(serializers.IntegerField()) - subset_pk = serializers.IntegerField() - subset_name = serializers.CharField() - interventions = serializers.ListField(serializers.IntegerField()) - group_pk = serializers.IntegerField() - individual_pk = serializers.IntegerField() - normed = serializers.BooleanField() - calculated = serializers.BooleanField() - - tissue = serializers.CharField() - tissue_label = serializers.CharField() - - method = serializers.CharField() - method_label = serializers.CharField() - - - label = serializers.CharField() - output_type = serializers.CharField() - - time = serializers.FloatField() - time_unit = serializers.CharField() - - measurement_type =serializers.CharField() - measurement_type__label =serializers.CharField() - choice = serializers.CharField() - choice_label =serializers.CharField() - substance =serializers.CharField() - substance_label =serializers.CharField() - - value = serializers.ListField(serializers.IntegerField()) - mean = serializers.ListField(serializers.IntegerField()) - median = serializers.ListField(serializers.IntegerField()) - min = serializers.ListField(serializers.IntegerField()) - max = serializers.ListField(serializers.IntegerField()) - sd = serializers.ListField(serializers.IntegerField()) - se = serializers.ListField(serializers.IntegerField()) - cv = serializers.ListField(serializers.IntegerField()) - unit = serializers.CharField() - - - class Meta: - fields = ["study_sid", "study_name", "output_pk", "intervention_pk", "group_pk", "individual_pk", "normed", - "calculated"] + OUTPUT_FIELDS + MEASUREMENTTYPE_FIELDS \ No newline at end of file diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index 803013f5..c2dbd584 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -10,7 +10,6 @@ import pandas as pd from django.db import connection from django.test.client import RequestFactory -import numpy as np import django_filters.rest_framework from django.core.exceptions import ObjectDoesNotExist @@ -28,10 +27,11 @@ from elasticsearch_dsl.query import Q from pkdb_app.data.documents import DataAnalysisDocument, SubSetDocument -from pkdb_app.data.models import SubSet, Data, DataPoint -from pkdb_app.data.views import SubSetViewSet, DataAnalysisViewSet -from pkdb_app.documents import AccessView, UUID_PARAM -from pkdb_app.interventions.serializers import InterventionElasticSerializerAnalysis +from pkdb_app.data.models import SubSet, Data +from pkdb_app.data.serializers import TimecourseSerializer +from pkdb_app.data.views import SubSetViewSet +from pkdb_app.documents import UUID_PARAM +from pkdb_app.interventions.serializers import InterventionElasticSerializerAnalysis from pkdb_app.outputs.serializers import OutputInterventionSerializer from pkdb_app.subjects.serializers import GroupCharacteristicaSerializer, IndividualCharacteristicaSerializer from rest_framework.generics import get_object_or_404 @@ -737,7 +737,7 @@ def serialize_timecourses(ids): "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, IndividualCharacteristicaViewSet,IndividualCharacteristicaSerializer, None, True), "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]} ,ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None, False), "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, OutputInterventionSerializer,None, True), - "timecourses": Sheet("Timecourses", {"subset_pk": pkdata.ids["timecourses"]}, None, None, serialize_timecourses, None), + "timecourses": Sheet("Timecourses", {"pk": pkdata.ids["timecourses"]}, SubSetViewSet, TimecourseSerializer, None, False), "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatters, None), } From bf87fe527615bd1733e6fc8db3476ba4782cc78f Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Tue, 6 Oct 2020 11:41:00 +0200 Subject: [PATCH 11/29] timecourse flat endpoint --- backend/pkdb_app/data/serializers.py | 8 +++---- backend/pkdb_app/data/views.py | 33 +++++++++++++++++++++++++++- backend/pkdb_app/urls.py | 3 ++- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 7dabf057..a57f5ebf 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -1,6 +1,5 @@ import json import traceback -from cachetools import cached from django_elasticsearch_dsl_drf.serializers import DocumentSerializer from pkdb_app.behaviours import MEASUREMENTTYPE_FIELDS from pkdb_app.comments.serializers import DescriptionSerializer, CommentSerializer, CommentElasticSerializer, \ @@ -406,18 +405,19 @@ class TimecourseSerializer(serializers.Serializer): cv = serializers.SerializerMethodField() unit = serializers.SerializerMethodField() - @lru_cache(maxsize=64) + @lru_cache(maxsize=128) def _get_general(self,obj): obj = [v["point"][0] for v in json.loads(obj)["array"]] result = pd.DataFrame(obj) - + result.where(result.notnull(), None) return result def _get_field(self, obj, field): result = self._get_general(json.dumps(obj.to_dict())) if result[field].isnull().all(): return None - return result[field].values + + return result[field].replac(.values def get_output_pk(self, obj): result = self._get_general(json.dumps(obj.to_dict())) diff --git a/backend/pkdb_app/data/views.py b/backend/pkdb_app/data/views.py index 838eaa76..910b6dd3 100644 --- a/backend/pkdb_app/data/views.py +++ b/backend/pkdb_app/data/views.py @@ -6,7 +6,7 @@ ) from pkdb_app.documents import AccessView from pkdb_app.data.documents import DataAnalysisDocument, SubSetDocument -from pkdb_app.data.serializers import DataAnalysisSerializer, SubSetElasticSerializer +from pkdb_app.data.serializers import DataAnalysisSerializer, SubSetElasticSerializer, TimecourseSerializer from pkdb_app.pagination import CustomPagination @@ -84,3 +84,34 @@ class SubSetViewSet(AccessView): "name": "name.raw", "data_type": "data_type.raw" } +class TimecourseViewSet(AccessView): + """ Endpoint to query Timecourses + + The subets endpoint gives access to the subset data. A Subset is a collection of outputs which can be either a + timecourse or scatter. A timecourse subset consists of outputs measured at different time points. A scatter subset + contains correlated data which commonly are displayed as scatter plots. + """ + document = SubSetDocument + serializer_class = TimecourseSerializer + pagination_class = CustomPagination + lookup_field = "id" + filter_backends = [FilteringFilterBackend, IdsFilterBackend, MultiMatchSearchFilterBackend] + search_fields = ( + "name", + "data_type", + "study.sid", + "study.name", + "array.data_points.point.outputs.group.name", + "array.data_points.point.outputs.individual.name", + "array.data_points.point.outputs.interventions.name", + "array.data_points.point.outputs.measurement_type.label", + "array.data_points.point.outputs.choice.label", + "array.data_points.point.outputs.substance.label", + "array.data_points.point.outputs.tissue.label", + ) + multi_match_search_fields = {field: {"boost": 1} for field in search_fields} + multi_match_options = {'operator': 'and'} + filter_fields = { + "name": "name.raw", + "data_type": "data_type.raw" + } diff --git a/backend/pkdb_app/urls.py b/backend/pkdb_app/urls.py index 824c14e9..d0fe1506 100755 --- a/backend/pkdb_app/urls.py +++ b/backend/pkdb_app/urls.py @@ -4,7 +4,7 @@ from django.conf.urls import url from django.urls import path, include from drf_yasg.views import get_schema_view -from pkdb_app.data.views import DataAnalysisViewSet, SubSetViewSet +from pkdb_app.data.views import DataAnalysisViewSet, SubSetViewSet, TimecourseViewSet from rest_framework.authtoken.views import obtain_auth_token from rest_framework.routers import DefaultRouter @@ -88,6 +88,7 @@ router.register("flat/individuals", IndividualCharacteristicaViewSet, basename="individuals_analysis") router.register("flat/output", OutputInterventionViewSet, basename="output_analysis") router.register("flat/data", DataAnalysisViewSet, basename="data_analysis") +router.register("flat/timecourses", TimecourseViewSet, basename="timecourse_analysis") From e7c44c6fc0731a4ac1643edbf3293136b13d7058 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Tue, 6 Oct 2020 11:43:55 +0200 Subject: [PATCH 12/29] debug --- backend/pkdb_app/data/serializers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index a57f5ebf..36c32bf3 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -409,15 +409,13 @@ class TimecourseSerializer(serializers.Serializer): def _get_general(self,obj): obj = [v["point"][0] for v in json.loads(obj)["array"]] result = pd.DataFrame(obj) - result.where(result.notnull(), None) - return result + return result.where(result.notnull(), None) def _get_field(self, obj, field): result = self._get_general(json.dumps(obj.to_dict())) if result[field].isnull().all(): return None - - return result[field].replac(.values + return result[field].values def get_output_pk(self, obj): result = self._get_general(json.dumps(obj.to_dict())) From e5237e660209d1c1c6d8985a724ecbd089c53d57 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Tue, 6 Oct 2020 11:54:37 +0200 Subject: [PATCH 13/29] flat studies ViewSet --- backend/pkdb_app/studies/views.py | 4 ++++ backend/pkdb_app/urls.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index c2dbd584..920c263e 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -336,6 +336,10 @@ def get_queryset(self): ) return qs +class StudyAnalysisViewSet(ElasticStudyViewSet): + swagger_schema = None + serializer_class = StudyAnalysisSerializer + class ElasticReferenceViewSet(BaseDocumentViewSet): """Read/query/search references. """ diff --git a/backend/pkdb_app/urls.py b/backend/pkdb_app/urls.py index d0fe1506..961cdaed 100755 --- a/backend/pkdb_app/urls.py +++ b/backend/pkdb_app/urls.py @@ -31,7 +31,7 @@ StudyViewSet, ElasticReferenceViewSet, ElasticStudyViewSet, - update_index_study, PKDataView, + update_index_study, PKDataView, StudyAnalysisViewSet, ) from .subjects.views import ( DataFileViewSet, @@ -83,6 +83,7 @@ router.register('_info_nodes', InfoNodeViewSet, basename="_info_nodes") # django +router.register("flat/studies", StudyAnalysisViewSet, basename="studies_analysis") router.register("flat/interventions", ElasticInterventionAnalysisViewSet, basename="interventions_analysis") router.register("flat/groups", GroupCharacteristicaViewSet, basename="groups_analysis") router.register("flat/individuals", IndividualCharacteristicaViewSet, basename="individuals_analysis") From 3fabe274afca74f72b5f497f01bb3f5b3665cc2a Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Tue, 6 Oct 2020 12:42:57 +0200 Subject: [PATCH 14/29] fix endpoint --- backend/pkdb_app/data/serializers.py | 2 ++ backend/pkdb_app/urls.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 36c32bf3..99cb0756 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -365,6 +365,7 @@ def create(self, validated_data): ################################ class TimecourseSerializer(serializers.Serializer): + """ Timecourse Serializer""" study_sid = serializers.CharField() study_name = serializers.CharField() output_pk = serializers.SerializerMethodField() @@ -407,6 +408,7 @@ class TimecourseSerializer(serializers.Serializer): @lru_cache(maxsize=128) def _get_general(self,obj): + """ This function reshapes and reformats the outputs to a Django DataFrame. """ obj = [v["point"][0] for v in json.loads(obj)["array"]] result = pd.DataFrame(obj) return result.where(result.notnull(), None) diff --git a/backend/pkdb_app/urls.py b/backend/pkdb_app/urls.py index 961cdaed..3ab564c6 100755 --- a/backend/pkdb_app/urls.py +++ b/backend/pkdb_app/urls.py @@ -87,7 +87,7 @@ router.register("flat/interventions", ElasticInterventionAnalysisViewSet, basename="interventions_analysis") router.register("flat/groups", GroupCharacteristicaViewSet, basename="groups_analysis") router.register("flat/individuals", IndividualCharacteristicaViewSet, basename="individuals_analysis") -router.register("flat/output", OutputInterventionViewSet, basename="output_analysis") +router.register("flat/outputs", OutputInterventionViewSet, basename="output_analysis") router.register("flat/data", DataAnalysisViewSet, basename="data_analysis") router.register("flat/timecourses", TimecourseViewSet, basename="timecourse_analysis") From 3b49985227082189b4733e06ffa933014ceb8bd3 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Tue, 6 Oct 2020 16:46:09 +0200 Subject: [PATCH 15/29] filter fields added --- backend/pkdb_app/data/serializers.py | 9 ++++++++- backend/pkdb_app/data/views.py | 23 +++++++++++++++++------ backend/pkdb_app/studies/views.py | 18 +++++++++++++++++- backend/pkdb_app/urls.py | 14 +++++++------- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 99cb0756..27b359e2 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -1,5 +1,7 @@ import json import traceback + +from django.utils.functional import cached_property from django_elasticsearch_dsl_drf.serializers import DocumentSerializer from pkdb_app.behaviours import MEASUREMENTTYPE_FIELDS from pkdb_app.comments.serializers import DescriptionSerializer, CommentSerializer, CommentElasticSerializer, \ @@ -406,9 +408,14 @@ class TimecourseSerializer(serializers.Serializer): cv = serializers.SerializerMethodField() unit = serializers.SerializerMethodField() + #@cached_property + #def json_object(self): + # return json.dumps(self.instance.to_dict()) + @lru_cache(maxsize=128) def _get_general(self,obj): - """ This function reshapes and reformats the outputs to a Django DataFrame. """ + """ This function reshapes and reformats the outputs to a Pandas DataFrame. """ + obj = [v["point"][0] for v in json.loads(obj)["array"]] result = pd.DataFrame(obj) return result.where(result.notnull(), None) diff --git a/backend/pkdb_app/data/views.py b/backend/pkdb_app/data/views.py index 910b6dd3..8f1b3437 100644 --- a/backend/pkdb_app/data/views.py +++ b/backend/pkdb_app/data/views.py @@ -85,11 +85,9 @@ class SubSetViewSet(AccessView): "data_type": "data_type.raw" } class TimecourseViewSet(AccessView): - """ Endpoint to query Timecourses + """ Endpoint to query timecourses - The subets endpoint gives access to the subset data. A Subset is a collection of outputs which can be either a - timecourse or scatter. A timecourse subset consists of outputs measured at different time points. A scatter subset - contains correlated data which commonly are displayed as scatter plots. + The timecourses endpoints gives access to timecourses. """ document = SubSetDocument serializer_class = TimecourseSerializer @@ -112,6 +110,19 @@ class TimecourseViewSet(AccessView): multi_match_search_fields = {field: {"boost": 1} for field in search_fields} multi_match_options = {'operator': 'and'} filter_fields = { - "name": "name.raw", - "data_type": "data_type.raw" + 'study_sid': {'field': 'study_sid.raw', + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, + + ], + }, + 'study_name': {'field': 'study_name.raw', + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, + + ], + }, + } diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index 920c263e..b27d06b2 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -17,7 +17,7 @@ from django.http import JsonResponse, HttpResponse from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt -from django_elasticsearch_dsl_drf.constants import LOOKUP_QUERY_IN +from django_elasticsearch_dsl_drf.constants import LOOKUP_QUERY_IN, LOOKUP_QUERY_EXCLUDE from django_elasticsearch_dsl_drf.filter_backends import FilteringFilterBackend, \ OrderingFilterBackend, IdsFilterBackend, MultiMatchSearchFilterBackend, CompoundSearchFilterBackend from django_elasticsearch_dsl_drf.viewsets import BaseDocumentViewSet, DocumentViewSet @@ -339,6 +339,22 @@ def get_queryset(self): class StudyAnalysisViewSet(ElasticStudyViewSet): swagger_schema = None serializer_class = StudyAnalysisSerializer + filter_fields = { + 'study_sid': {'field': 'sid.raw', + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, + + ], + }, + 'study_name': {'field': 'name.raw', + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, + + ], + }, + } class ElasticReferenceViewSet(BaseDocumentViewSet): diff --git a/backend/pkdb_app/urls.py b/backend/pkdb_app/urls.py index 3ab564c6..e4a3abf7 100755 --- a/backend/pkdb_app/urls.py +++ b/backend/pkdb_app/urls.py @@ -83,13 +83,13 @@ router.register('_info_nodes', InfoNodeViewSet, basename="_info_nodes") # django -router.register("flat/studies", StudyAnalysisViewSet, basename="studies_analysis") -router.register("flat/interventions", ElasticInterventionAnalysisViewSet, basename="interventions_analysis") -router.register("flat/groups", GroupCharacteristicaViewSet, basename="groups_analysis") -router.register("flat/individuals", IndividualCharacteristicaViewSet, basename="individuals_analysis") -router.register("flat/outputs", OutputInterventionViewSet, basename="output_analysis") -router.register("flat/data", DataAnalysisViewSet, basename="data_analysis") -router.register("flat/timecourses", TimecourseViewSet, basename="timecourse_analysis") +router.register("pkdata/studies", StudyAnalysisViewSet, basename="studies_analysis") +router.register("pkdata/interventions", ElasticInterventionAnalysisViewSet, basename="interventions_analysis") +router.register("pkdata/groups", GroupCharacteristicaViewSet, basename="groups_analysis") +router.register("pkdata/individuals", IndividualCharacteristicaViewSet, basename="individuals_analysis") +router.register("pkdata/outputs", OutputInterventionViewSet, basename="output_analysis") +router.register("pkdata/data", DataAnalysisViewSet, basename="data_analysis") +router.register("pkdata/timecourses", TimecourseViewSet, basename="timecourse_analysis") From 74cc67485e08075d9c140266b1c7715ccd8fe468 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 8 Oct 2020 17:53:05 +0200 Subject: [PATCH 16/29] Fix #29, replace to symlinks --- backend/README.md | 43 ------ backend/download_extra/README.md | 189 +------------------------ backend/download_extra/TERMS_OF_USE.md | 49 +------ backend/pkdb_app/studies/views.py | 4 - 4 files changed, 2 insertions(+), 283 deletions(-) delete mode 100644 backend/README.md mode change 100644 => 120000 backend/download_extra/README.md mode change 100644 => 120000 backend/download_extra/TERMS_OF_USE.md diff --git a/backend/README.md b/backend/README.md deleted file mode 100644 index 058507db..00000000 --- a/backend/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# PKDB Backend (`django`) - -- [ ] documentation page with queries and searches - -## PKData Query -The event cycle of PKData is: -1. Query studies, interventions, groups, individuals, and outputs by adding - the respective word as a prefix following two underscores to the url filter - (e.g. ...api/v1/pkdata/?studies__sid=PKDB00008 is equivalent to ...api/v1/studies/?sid=PKDB00008). - The search/filter is performed on the indexed database. For more details on how to construct the query by patterns in the - url check "https://django-elasticsearch-dsl-drf.readthedocs.io/en/latest/". - -2. All tables are updated to get rid of redundant entries. This results in a concise set of entries -in all tables (e.g. a filter on the study table for a specific sid reduces the entries of the other tables -only to interventions, groups, individuals, and outputs which are part of the study). - -3. paginated studies, interventions, groups, individuals, and outputs are returned. Getting the next page for one of the tables -works equivalently to the filters (e.g. getting the second studies page while searching for the interventions containing caffeine. ...api/v1/pkdata/?interventions__substance=caffeine&studies__page=2). - - -## PKDData -documentation - -### Queries - -Query for single study: -``` -http://localhost:8000/api/v1/pkdata/?studies__sid=PKDB00008 -``` -Query for multiple studies based on sids: -``` -http://localhost:8000/api/v1/pkdata/?studies__sid__in=PKDB00008__PKDB00001 -``` -Query for interventions substance: -``` -http://localhost:8000/api/v1/pkdata/?interventions__substance=codeine -``` -Query for interventions and outputs simultaneously: -``` -http://localhost:8000/api/v1/pkdata/?interventions__substance=codeine&outputs__measurement_type=clearance -``` - -© 2017-2020 Jan Grzegorzewski & Matthias König. diff --git a/backend/download_extra/README.md b/backend/download_extra/README.md deleted file mode 100644 index aa93c46c..00000000 --- a/backend/download_extra/README.md +++ /dev/null @@ -1,188 +0,0 @@ -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) -[![License (LGPL version 3)](https://img.shields.io/badge/license-LGPLv3.0-blue.svg?style=flat-square)](http://opensource.org/licenses/LGPL-3.0) - - Jan Grzegorzewski -and - Matthias König - -# PK-DB - a pharmacokinetics database - -* [Overview](https://github.com/matthiaskoenig/pkdb#overview) -* [How to cite](https://github.com/matthiaskoenig/pkdb#how-to-cite) -* [License](https://github.com/matthiaskoenig/pkdb#license) -* [Funding](https://github.com/matthiaskoenig/pkdb#funding) -* [Installation](https://github.com/matthiaskoenig/pkdb#installation) -* [REST API](https://github.com/matthiaskoenig/pkdb#rest-api) -* [Docker interaction](https://github.com/matthiaskoenig/pkdb#docker-interaction) - -## Overview -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -[PK-DB](https://pk-db.com) is a database and web interface for pharmacokinetics data and information from clinical trials -as well as pre-clinical research. PK-DB allows to curate pharmacokinetics data integrated with the -corresponding meta-information -- characteristics of studied patient collectives and individuals (age, bodyweight, smoking status, ...) -- applied interventions (e.g., dosing, substance, route of application) -- measured pharmacokinetics time courses and pharmacokinetics parameters (e.g., clearance, half-life, ...). - -Important features are -- the representation of experimental errors and variation -- the representation and normalisation of units -- annotation of information to biological ontologies -- calculation of pharmacokinetics information from time courses (apparent clearance, half-life, ...) -- a workflow for collaborative data curation -- strong validation rules on data, and simple access via a REST API - -PK-DB is available at https://pk-db.com - -## License -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -PK-DB code and documentation is licensed as -* Source Code: [LGPLv3](http://opensource.org/licenses/LGPL-3.0) -* Documentation: [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) - -## Funding -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -Jan Grzegorzewski and Matthias König are supported by the Federal Ministry of Education and Research (BMBF, Germany) -within the research network Systems Medicine of the Liver ([LiSyM](http://www.lisym.org/), grant number 031L0054). - -## How to cite -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -If you use PK-DB data or the web interface cite - -> *PK-DB: PharmacoKinetics DataBase for Individualized and Stratified Computational Modeling* -> Jan Grzegorzewski, Janosch Brandhorst, Dimitra Eleftheriadou, Kathleen Green, Matthias König -> bioRxiv 760884; doi: https://doi.org/10.1101/760884 - -If you use PK-DB code cite in addition - -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) - -## Installation -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -PK-DB is deployed via `docker` and `docker-compose`. - -### Requirements -To setup the development server -the following minimal requirements must be fulfilled -- `docker` -- `docker-compose` -- `Python3.6` - -For elasticsearch the following system settings are required -``` -sudo sysctl -w vm.max_map_count=262144 -``` -To set `vm.max_map_count` persistently change the value in -``` -/etc/sysctl.conf -``` -### Start development server -To start the local development server -```bash -# clone or pull the latest code -git clone https://github.com/matthiaskoenig/pkdb.git -cd pkdb -git pull - -# set environment variables -set -a && source .env.local - -# create/rebuild all docker containers -./docker-purge.sh -``` -This setups a clean database and clean volumes and starts the containers for `pkdb_backend`, `pkdb_frontend`, `elasticsearch` and `postgres`. -You can check that all the containers are running via -```bash -docker container ls -``` -which lists the current containers -``` -CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -bc7f9204468f pkdb_backend "bash -c '/usr/local…" 27 hours ago Up 18 hours 0.0.0.0:8000->8000/tcp pkdb_backend_1 -17b8d243e956 pkdb_frontend "/bin/sh -c 'npm run…" 27 hours ago Up 18 hours 0.0.0.0:8080->8080/tcp pkdb_frontend_1 -7730c6fe2210 elasticsearch:6.8.1 "/usr/local/bin/dock…" 27 hours ago Up 18 hours 9300/tcp, 0.0.0.0:9123->9200/tcp pkdb_elasticsearch_1 -e880fbb0f349 postgres:11.4 "docker-entrypoint.s…" 27 hours ago Up 18 hours 0.0.0.0:5433->5432/tcp pkdb_postgres_1 -``` -The locally running develop version of PK-DB can now be accessed via the web browser from -- frontend: http://localhost:8080 -- backend: http://localhost:8000 - -### Fill database -Due to copyright, licensing and privacy issues this repository does not contain any data. -All data is managed via a separate private repository at https://github.com/matthiaskoenig/pkdb_data. -This also includes the curation scripts and curation workflows. - -If you are interested in curating data or contributing data please contact us at https://livermetabolism.com. - - -## REST API -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -PKDB provides a REST API which allows simple interaction with the database and easy access of data. -An overview over the REST endpoints is provided at [`http://localhost:8000/api/v1/`](http://localhost:8000/api/v1/). - -### Query examples -The REST API supports elastisearch queries, with syntax examples -available [here](https://django-elasticsearch-dsl-drf.readthedocs.io/en/latest/basic_usage_examples.html) -* http://localhost:8000/api/v1/comments_elastic/?user_lastname=K%C3%B6nig -* http://localhost:8000/api/v1/characteristica_elastic/?group_pk=5&final=true -* http://localhost:8000/api/v1/characteristica_elastic/?search=group_name:female&final=true -* http://localhost:8000/api/v1/substances_elastic/?search:name=cod -* http://localhost:8000/api/v1/substances_elastic/?search=cod -* http://localhost:8000/api/v1/substances_elastic/?ids=1__2__3 -* http://localhost:8000/api/v1/substances_elastic/?ids=1__2__3&ordering=-name -* http://localhost:8000/api/v1/substances_elastic/?name=caffeine&name=acetaminophen - -### Suggestion example -In addition suggestion queries are possible -* http://localhost:8000/api/v1/substances_elastic/suggest/?search:name=cod - -## Docker interaction -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -In the following typical examples to interact with the PK-DB docker containers are provided. - -### Check running containers -To check the running containers use -```bash -watch docker container ls -``` - -### Interactive container mode -```bash -./docker-interactive.sh -``` - -### Container logs -To get access to individual container logs use `docker container logs `. For instance to check the -django backend logs use -```bash -docker container logs pkdb_backend_1 -``` - -### Run command in container -To run commands inside the docker container use -```bash -docker-compose run --rm backend [command] -``` -or to run migrations -```bash -docker-compose run --rm backend python manage.py makemigrations -``` - -### Authentication data -The following examples show how to dump and restore the authentication data. - -Dump authentication data -```bash -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py dumpdata auth --indent 2 > ./backend/pkdb_app/fixtures/auth.json -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py dumpdata users --indent 2 > ./backend/pkdb_app/fixtures/users.json -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py dumpdata rest_email_auth --indent 2 > ./backend/pkdb_app/fixtures/rest_email_auth.json -``` - -Restore authentication data -```bash -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py loaddata auth pkdb_app/fixtures/auth.json -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py loaddata users pkdb_app/fixtures/users.json -docker-compose -f $PKDB_DOCKER_COMPOSE_YAML run --rm backend ./manage.py loaddata rest_email_auth pkdb_app/fixtures/rest_email_auth.json -``` - -© 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. diff --git a/backend/download_extra/README.md b/backend/download_extra/README.md new file mode 120000 index 00000000..42061c01 --- /dev/null +++ b/backend/download_extra/README.md @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/backend/download_extra/TERMS_OF_USE.md b/backend/download_extra/TERMS_OF_USE.md deleted file mode 100644 index 42c131c3..00000000 --- a/backend/download_extra/TERMS_OF_USE.md +++ /dev/null @@ -1,48 +0,0 @@ -# PK-DB Terms of Use - -## General -1. PK-DB promotes open science through its mission to provide freely available online services, database and software relating to data contributed from life science experiments to the largest possible community. Where we present scientific data generated by others we impose no additional restriction on the use of the contributed data than those provided by the data owner. - -2. PK-DB expects attribution (e.g. in publications, services or products) for any of its online services, databases or software in accordance with good scientific practice. The expected attribution will be indicated on the appropriate web page. - -3. Any feedback provided to PK-DB on its online services will be treated as non-confidential unless the individual or organisation providing the feedback states otherwise. - -4. PK-DB is not liable to you or third parties claiming through you, for any loss or damage. - -5. All scientific data will be made available by a time and release mechanism consistent with the data type (e.g. human data where access needs to be reviewed by a Data Access Committee, pre-publication embargoed for a specific time period). - -6. Personal data held by PK-DB will only be released in exceptional circumstances when required by law or judicial or regulatory order. PK-DB may make information about the total volume of usage of particular software or data available to the public and third party organisations who supply the software or databases without details of any individual’s use. - -7. While we will retain our commitment to OpenScience, we reserve the right to update these Terms of Use at any time. When alterations are inevitable, we will attempt to give reasonable notice of any changes by placing a notice on our website, but you may wish to check each time you use the website. The date of the most recent revision will appear on this, the ‘PK-DB Terms of Use’ page. If you do not agree to these changes, please do not continue to use our online services. We will also make available an archived copy of the previous Terms of Use for comparison. - -8. Any questions or comments concerning these Terms of Use can be addressed to: Matthias König, PK-DB - - -## Online services -1. Users of PK-DB online services agree not to attempt to use any EMBL-EBI computers, files or networks apart from through the service interfaces provided. - -2. The PK-DB websites may use cookies to record information about your online preferences that allow us to personalise your experience of the website. You can control your use of cookies from your web browser, but if you choose not to accept cookies from PK-DB’s websites, you will not be able to take full advantage of all of the website’s features. - -3. PK-DB will make all reasonable effort to maintain continuity of these online services and provide adequate warning of any changes or discontinuities. However, PK-DB accepts no responsibility for the consequences of any temporary or permanent discontinuity in service. - -4. Any attempt to use PK-DB online services to a level that prevents, or looks likely to prevent, PK-DB providing services to others, will result in the use being blocked. PK-DB will attempt to contact the user to discuss their needs and how (and if) these can be met from other sources. - -5. If you post or send offensive, inappropriate or objectionable content anywhere on or to our websites or otherwise engage in any disruptive behaviour on any of our services, we may use your personal information from our security logs to stop such behaviour. Where we reasonably believe that you are or may be in breach of any applicable laws we may use your personal information to inform relevant third parties about the content and your behaviour. - -6. PK-DB has implemented appropriate technical and organisational measures to ensure a level of security which we deem appropriate, taking into account the categories of data we collect and the way we process it. - -7. PK-DB does not accept responsibility for the consequences of any breach of the confidentiality of PK-DB site by third parties. - -## Data services -1. The online data services and databases of PK-DB are generated in part from data contributed by the community who remain the data owners. - -2. When you contribute scientific data to a database through our website or other submission tools this information will be released at a time and in a manner consistent with the scientific data and we may store it permanently. - -3. PK-DB itself places no additional restrictions on the use or redistribution of the data available via its online services other than those provided by the original data owners. - -4. PK-DB does not guarantee the accuracy of any provided data, generated database, software or online service nor the suitability of databases, software and online services for any purpose. - -5. The original data may be subject to rights claimed by third parties, including but not limited to, patent, copyright, other intellectual property rights, biodiversity-related access and benefit-sharing rights. It is the responsibility of users of PK-DB services to ensure that their exploitation of the data does not infringe any of the rights of such third parties. - - -© 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. \ No newline at end of file diff --git a/backend/download_extra/TERMS_OF_USE.md b/backend/download_extra/TERMS_OF_USE.md new file mode 120000 index 00000000..d4a6516c --- /dev/null +++ b/backend/download_extra/TERMS_OF_USE.md @@ -0,0 +1 @@ +TERMS_OF_USE.md \ No newline at end of file diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index b27d06b2..c467f433 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -812,13 +812,9 @@ def unique_or_sorted_list(v): download_times["timecourse"] = time.time()-download_time_start_timecourse """ - - archive.write('download_extra/README.md', 'README.md') archive.write('download_extra/TERMS_OF_USE.md', 'TERMS_OF_USE.md') - - tmp.seek(0) resp = HttpResponse(tmp.read(), content_type='application/x-zip-compressed') resp['Content-Disposition'] = "attachment; filename=%s" % "pkdata.zip" From 7e714bd4ba92daeb1d13068e8fc5ac7a1f9b1463 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 8 Oct 2020 18:33:25 +0200 Subject: [PATCH 17/29] updated download files --- backend/download_extra/README.md | 58 +++++++++++++++++++++++++- backend/download_extra/TERMS_OF_USE.md | 49 +++++++++++++++++++++- backend/pkdb_app/studies/views.py | 5 ++- 3 files changed, 108 insertions(+), 4 deletions(-) mode change 120000 => 100644 backend/download_extra/README.md mode change 120000 => 100644 backend/download_extra/TERMS_OF_USE.md diff --git a/backend/download_extra/README.md b/backend/download_extra/README.md deleted file mode 120000 index 42061c01..00000000 --- a/backend/download_extra/README.md +++ /dev/null @@ -1 +0,0 @@ -README.md \ No newline at end of file diff --git a/backend/download_extra/README.md b/backend/download_extra/README.md new file mode 100644 index 00000000..5af31e10 --- /dev/null +++ b/backend/download_extra/README.md @@ -0,0 +1,57 @@ +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) +[![License (LGPL version 3)](https://img.shields.io/badge/license-LGPLv3.0-blue.svg?style=flat-square)](http://opensource.org/licenses/LGPL-3.0) + + Jan Grzegorzewski +and + Matthias König + +# PK-DB - a pharmacokinetics database + +* [Overview](https://github.com/matthiaskoenig/pkdb#overview) +* [How to cite](https://github.com/matthiaskoenig/pkdb#how-to-cite) +* [License](https://github.com/matthiaskoenig/pkdb#license) +* [Funding](https://github.com/matthiaskoenig/pkdb#funding) + +## Overview +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +[PK-DB](https://pk-db.com) is a database and web interface for pharmacokinetics data and information from clinical trials +as well as pre-clinical research. PK-DB allows to curate pharmacokinetics data integrated with the +corresponding meta-information +- characteristics of studied patient collectives and individuals (age, bodyweight, smoking status, ...) +- applied interventions (e.g., dosing, substance, route of application) +- measured pharmacokinetics time courses and pharmacokinetics parameters (e.g., clearance, half-life, ...). + +Important features are +- the representation of experimental errors and variation +- the representation and normalisation of units +- annotation of information to biological ontologies +- calculation of pharmacokinetics information from time courses (apparent clearance, half-life, ...) +- a workflow for collaborative data curation +- strong validation rules on data, and simple access via a REST API + +PK-DB is available at https://pk-db.com + +## License +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +PK-DB code and documentation is licensed as +* Source Code: [LGPLv3](http://opensource.org/licenses/LGPL-3.0) +* Documentation: [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) + +## Funding +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +Jan Grzegorzewski and Matthias König are supported by the Federal Ministry of Education and Research (BMBF, Germany) +within the research network Systems Medicine of the Liver ([LiSyM](http://www.lisym.org/), grant number 031L0054). + +## How to cite +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +If you use PK-DB data or the web interface cite + +> *PK-DB: PharmacoKinetics DataBase for Individualized and Stratified Computational Modeling* +> Jan Grzegorzewski, Janosch Brandhorst, Dimitra Eleftheriadou, Kathleen Green, Matthias König +> bioRxiv 760884; doi: https://doi.org/10.1101/760884 + +If you use PK-DB code cite in addition + +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) + +© 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. diff --git a/backend/download_extra/TERMS_OF_USE.md b/backend/download_extra/TERMS_OF_USE.md deleted file mode 120000 index d4a6516c..00000000 --- a/backend/download_extra/TERMS_OF_USE.md +++ /dev/null @@ -1 +0,0 @@ -TERMS_OF_USE.md \ No newline at end of file diff --git a/backend/download_extra/TERMS_OF_USE.md b/backend/download_extra/TERMS_OF_USE.md new file mode 100644 index 00000000..42c131c3 --- /dev/null +++ b/backend/download_extra/TERMS_OF_USE.md @@ -0,0 +1,48 @@ +# PK-DB Terms of Use + +## General +1. PK-DB promotes open science through its mission to provide freely available online services, database and software relating to data contributed from life science experiments to the largest possible community. Where we present scientific data generated by others we impose no additional restriction on the use of the contributed data than those provided by the data owner. + +2. PK-DB expects attribution (e.g. in publications, services or products) for any of its online services, databases or software in accordance with good scientific practice. The expected attribution will be indicated on the appropriate web page. + +3. Any feedback provided to PK-DB on its online services will be treated as non-confidential unless the individual or organisation providing the feedback states otherwise. + +4. PK-DB is not liable to you or third parties claiming through you, for any loss or damage. + +5. All scientific data will be made available by a time and release mechanism consistent with the data type (e.g. human data where access needs to be reviewed by a Data Access Committee, pre-publication embargoed for a specific time period). + +6. Personal data held by PK-DB will only be released in exceptional circumstances when required by law or judicial or regulatory order. PK-DB may make information about the total volume of usage of particular software or data available to the public and third party organisations who supply the software or databases without details of any individual’s use. + +7. While we will retain our commitment to OpenScience, we reserve the right to update these Terms of Use at any time. When alterations are inevitable, we will attempt to give reasonable notice of any changes by placing a notice on our website, but you may wish to check each time you use the website. The date of the most recent revision will appear on this, the ‘PK-DB Terms of Use’ page. If you do not agree to these changes, please do not continue to use our online services. We will also make available an archived copy of the previous Terms of Use for comparison. + +8. Any questions or comments concerning these Terms of Use can be addressed to: Matthias König, PK-DB + + +## Online services +1. Users of PK-DB online services agree not to attempt to use any EMBL-EBI computers, files or networks apart from through the service interfaces provided. + +2. The PK-DB websites may use cookies to record information about your online preferences that allow us to personalise your experience of the website. You can control your use of cookies from your web browser, but if you choose not to accept cookies from PK-DB’s websites, you will not be able to take full advantage of all of the website’s features. + +3. PK-DB will make all reasonable effort to maintain continuity of these online services and provide adequate warning of any changes or discontinuities. However, PK-DB accepts no responsibility for the consequences of any temporary or permanent discontinuity in service. + +4. Any attempt to use PK-DB online services to a level that prevents, or looks likely to prevent, PK-DB providing services to others, will result in the use being blocked. PK-DB will attempt to contact the user to discuss their needs and how (and if) these can be met from other sources. + +5. If you post or send offensive, inappropriate or objectionable content anywhere on or to our websites or otherwise engage in any disruptive behaviour on any of our services, we may use your personal information from our security logs to stop such behaviour. Where we reasonably believe that you are or may be in breach of any applicable laws we may use your personal information to inform relevant third parties about the content and your behaviour. + +6. PK-DB has implemented appropriate technical and organisational measures to ensure a level of security which we deem appropriate, taking into account the categories of data we collect and the way we process it. + +7. PK-DB does not accept responsibility for the consequences of any breach of the confidentiality of PK-DB site by third parties. + +## Data services +1. The online data services and databases of PK-DB are generated in part from data contributed by the community who remain the data owners. + +2. When you contribute scientific data to a database through our website or other submission tools this information will be released at a time and in a manner consistent with the scientific data and we may store it permanently. + +3. PK-DB itself places no additional restrictions on the use or redistribution of the data available via its online services other than those provided by the original data owners. + +4. PK-DB does not guarantee the accuracy of any provided data, generated database, software or online service nor the suitability of databases, software and online services for any purpose. + +5. The original data may be subject to rights claimed by third parties, including but not limited to, patent, copyright, other intellectual property rights, biodiversity-related access and benefit-sharing rights. It is the responsibility of users of PK-DB services to ensure that their exploitation of the data does not infringe any of the rights of such third parties. + + +© 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. \ No newline at end of file diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index c467f433..fd83d867 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -5,6 +5,7 @@ from collections import namedtuple from datetime import datetime from io import StringIO +from pathlib import Path from typing import Dict import time import pandas as pd @@ -761,13 +762,13 @@ def serialize_timecourses(ids): "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatters, None), } - + # Create archive with tempfile.SpooledTemporaryFile() as tmp: with zipfile.ZipFile(tmp, 'w', zipfile.ZIP_DEFLATED) as archive: download_times = {} for key, sheet in table_content.items(): - download_time_start = time.time() + download_time_start = time.time() string_buffer = StringIO() if sheet.function: From 2ea5b56617fb25e945519cff56e5a9c1f507e0e9 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Fri, 9 Oct 2020 01:00:27 +0200 Subject: [PATCH 18/29] closes #670 --- backend/pkdb_app/data/serializers.py | 78 ++++++++++++------------- backend/pkdb_app/studies/serializers.py | 32 +++++----- backend/pkdb_app/studies/views.py | 5 +- backend/pkdb_app/subjects/documents.py | 4 ++ 4 files changed, 59 insertions(+), 60 deletions(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 27b359e2..51c68b75 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -47,7 +47,7 @@ class SubSetSerializer(ExSerializer): class Meta: model = SubSet - fields = ['name', "descriptions", "comments", "dimensions", "shared"] + fields = ['name', "descriptions", "comments", "dimensions", "shared"] def to_internal_value(self, data): self.validate_wrong_keys(data) @@ -97,7 +97,7 @@ def calculate_pks_from_timecourses(self, subset): outputs_dj = create_multiple_bulk(subset, "subset", outputs, Output) - for intervention, output in zip(interventions,outputs_dj): + for intervention, output in zip(interventions, outputs_dj): output.interventions.add(*intervention) if outputs_dj: @@ -107,7 +107,7 @@ def calculate_pks_from_timecourses(self, subset): subset.save() @staticmethod - def _add_id_to_foreign_keys(value:str): + def _add_id_to_foreign_keys(value: str): if value in OUTPUT_FOREIGN_KEYS: return value + "_id" else: @@ -135,19 +135,20 @@ def create_scatter(self, dimensions, shared, subset_instance): data_set = outputs_pd[outputs_pd['label'].isin(dimensions)] if len(data_set) == 0: raise serializers.ValidationError( - {"data_set":{"data":[{"subsets":{"dimensions":f"Outputs with label <{dimensions}> do not exist."}}]}}) + {"data_set": { + "data": [{"subsets": {"dimensions": f"Outputs with label <{dimensions}> do not exist."}}]}}) data_set["dimension"] = None - data_set.loc[data_set['label'] == dimensions[0],'dimension'] = 0 - data_set.loc[data_set['label'] == dimensions[1],'dimension'] = 1 + data_set.loc[data_set['label'] == dimensions[0], 'dimension'] = 0 + data_set.loc[data_set['label'] == dimensions[1], 'dimension'] = 1 shared_reformated = [] for shared_field in shared: shared_field_reformated = self._add_id_to_foreign_keys(shared_field) if shared_field_reformated not in data_set: - p_options = [self._remove_id_to_foreign_keys(c) for c in data_set.columns] - raise serializers.ValidationError(f"Shared_field <{shared_field}> not in outputs fields. Options are <{p_options}>") + raise serializers.ValidationError( + f"Shared_field <{shared_field}> not in outputs fields. Options are <{p_options}>") shared_reformated.append(shared_field_reformated) if len(data_set.groupby(shared_reformated)) == 0: @@ -210,7 +211,7 @@ def create_timecourse(self, subset_instance, dimensions): for output in subset_outputs.iterator(): data_point_instance = DataPoint.objects.create(subset=subset_instance) - dimension = Dimension(dimension=0, study=study, output=output,data_point=data_point_instance) + dimension = Dimension(dimension=0, study=study, output=output, data_point=data_point_instance) dimensions.append(dimension) Dimension.objects.bulk_create(dimensions) @@ -218,7 +219,6 @@ def create_timecourse(self, subset_instance, dimensions): class DataSerializer(ExSerializer): - comments = CommentSerializer( many=True, read_only=False, required=False, allow_null=True ) @@ -256,13 +256,12 @@ def create(self, validated_data): for subset in poped_data["subsets"]: subset_instance, poped_data = _create(model_serializer=SubSetSerializer(context=self.context), - validated_data={**subset, "data": data_instance}, - create_multiple_keys=['comments', 'descriptions']) + validated_data={**subset, "data": data_instance}, + create_multiple_keys=['comments', 'descriptions']) return data_instance class DataSetSerializer(ExSerializer): - data = DataSerializer(many=True, read_only=False, required=False, allow_null=True) comments = CommentSerializer( many=True, read_only=False, required=False, allow_null=True @@ -312,21 +311,29 @@ def to_internal_value(self, data): ) data_single['subsets'] = temp_subsets data_container.extend(self.entries_from_file(data_single)) + self.validate_no_timeocourses(data_container) autogenerate_timecourses = self.autogenerate_timecourses() if autogenerate_timecourses: - data_container.append(self.autogenerate_timecourses()) + data_container.append(autogenerate_timecourses) data['data'] = data_container return super().to_internal_value(data) + def validate_no_timeocourses(self, data): + for data_single in data: + if data_single.get("data_type") == Data.DataTypes.Timecourse: + raise serializers.ValidationError("timecourses are not allowed to be definied explictly in dataset. " + "Timecourses are created automatically by adding label " + "and output_type='timecourse' to the respective outputs.") + + def autogenerate_timecourses(self): - #Study = apps.get_model('studies', 'Study') + # Study = apps.get_model('studies', 'Study') study_sid = self.context["request"].path.split("/")[-2] outputs = Output.objects.filter(study__sid=study_sid, normed=True, output_type=Output.OutputTypes.Timecourse) - timecourse_labels = outputs.values_list("label",flat=True).distinct() + timecourse_labels = outputs.values_list("label", flat=True).distinct() if len(timecourse_labels) > 0: - auto_generated_data = { "name": "AutoGenerate", "data_type": "timecourse", @@ -335,18 +342,15 @@ def autogenerate_timecourses(self): } return auto_generated_data - def validate(self, attrs): self._validate_unique_names(attrs["data"]) return super().validate(attrs) - - def create(self, validated_data): dataset_instance, poped_data = _create(model_manager=self.Meta.model.objects, - validated_data=validated_data, - create_multiple_keys=['comments', 'descriptions'], - pop=['data']) + validated_data=validated_data, + create_multiple_keys=['comments', 'descriptions'], + pop=['data']) data_instance_container = [] for data_single in poped_data['data']: data_single["dataset"] = dataset_instance @@ -374,7 +378,7 @@ class TimecourseSerializer(serializers.Serializer): subset_pk = serializers.IntegerField(source="pk") subset_name = serializers.CharField(source="name") - interventions = serializers.SerializerMethodField() + interventions = serializers.SerializerMethodField() group_pk = serializers.SerializerMethodField() individual_pk = serializers.SerializerMethodField() normed = serializers.SerializerMethodField() @@ -390,10 +394,10 @@ class TimecourseSerializer(serializers.Serializer): time = serializers.SerializerMethodField() time_unit = serializers.SerializerMethodField() - measurement_type =serializers.SerializerMethodField() - measurement_type_label =serializers.SerializerMethodField() + measurement_type = serializers.SerializerMethodField() + measurement_type_label = serializers.SerializerMethodField() choice = serializers.SerializerMethodField() - choice_label =serializers.SerializerMethodField() + choice_label = serializers.SerializerMethodField() substance = serializers.SerializerMethodField() substance_label = serializers.SerializerMethodField() @@ -408,12 +412,12 @@ class TimecourseSerializer(serializers.Serializer): cv = serializers.SerializerMethodField() unit = serializers.SerializerMethodField() - #@cached_property - #def json_object(self): + # @cached_property + # def json_object(self): # return json.dumps(self.instance.to_dict()) @lru_cache(maxsize=128) - def _get_general(self,obj): + def _get_general(self, obj): """ This function reshapes and reformats the outputs to a Pandas DataFrame. """ obj = [v["point"][0] for v in json.loads(obj)["array"]] @@ -424,11 +428,11 @@ def _get_field(self, obj, field): result = self._get_general(json.dumps(obj.to_dict())) if result[field].isnull().all(): return None - return result[field].values + return list(result[field].values) def get_output_pk(self, obj): result = self._get_general(json.dumps(obj.to_dict())) - return result["pk"].values + return self._get_field(obj, "pk") def get_interventions(self, obj): result = self._get_general(json.dumps(obj.to_dict())) @@ -448,7 +452,6 @@ def get_normed(self, obj): result = self._get_general(json.dumps(obj.to_dict())) return result["normed"][0] - def get_tissue(self, obj): result = self._get_general(json.dumps(obj.to_dict())) if result["tissue"][0]: @@ -476,7 +479,6 @@ def get_label(self, obj): def get_time(self, obj): return self._get_field(obj, "time") - def get_time_unit(self, obj): result = self._get_general(json.dumps(obj.to_dict())) return result["time_unit"][0] @@ -541,13 +543,13 @@ class Meta: fields = ["study_sid", "study_name", "output_pk", "intervention_pk", "group_pk", "individual_pk", "normed", "calculated"] + OUTPUT_FIELDS + MEASUREMENTTYPE_FIELDS + class SubSetElasticSerializer(DocumentSerializer): study = StudySmallElasticSerializer(read_only=True) name = serializers.CharField() data_type = serializers.CharField() array = serializers.SerializerMethodField() - class Meta: document = SubSetDocument fields = ["pk", @@ -557,11 +559,10 @@ class Meta: "array", "timecourse"] - def get_array(self,object): + def get_array(self, object): return [point["point"] for point in object.to_dict()["array"]] - class DataSetElasticSmallSerializer(serializers.ModelSerializer): descriptions = DescriptionElasticSerializer(many=True, read_only=True) comments = CommentElasticSerializer(many=True, read_only=True) @@ -575,6 +576,7 @@ class Meta: def get_subsets(self, obj): return list_of_pk("subsets", obj) + class DataAnalysisSerializer(serializers.ModelSerializer): class Meta: model = Dimension @@ -589,5 +591,3 @@ class Meta: "output_pk", "dimension"] read_only_fields = fields - - diff --git a/backend/pkdb_app/studies/serializers.py b/backend/pkdb_app/studies/serializers.py index 50f8d4b3..c990abdd 100644 --- a/backend/pkdb_app/studies/serializers.py +++ b/backend/pkdb_app/studies/serializers.py @@ -8,7 +8,6 @@ from pkdb_app.data.serializers import DataSetSerializer, DataSetElasticSmallSerializer from rest_framework import serializers - from pkdb_app import utils from pkdb_app.outputs.models import OutputSet from pkdb_app.outputs.serializers import OutputSetSerializer, OutputSetElasticSmallSerializer @@ -194,11 +193,11 @@ def to_internal_value(self, data): data["creator"] = self.get_or_val_error(User, username=creator) # curators to internal - if hasattr(data,"curators"): - if len(data.get("curators",[])) == 0: - raise serializers.ValidationError( - {"curators": "At least One curator is required"} - ) + if hasattr(data, "curators"): + if len(data.get("curators", [])) == 0: + raise serializers.ValidationError( + {"curators": "At least One curator is required"} + ) else: ratings = [] for curator_and_rating in data.get("curators", []): @@ -314,6 +313,7 @@ def related_sets(): ("dataset", DataSet), ] ) + def related_serializer(self): return OrderedDict( [ @@ -340,9 +340,10 @@ def pop_relations(self, validated_data): "collaborators": User, "files": DataFile, } - related_foreignkeys_dict = OrderedDict([(name, validated_data.pop(name, None)) for name in related_foreignkeys.keys()]) + related_foreignkeys_dict = OrderedDict( + [(name, validated_data.pop(name, None)) for name in related_foreignkeys.keys()]) related_many2many_dict = OrderedDict([(name, validated_data.pop(name)) for name in related_many2many.keys() if - name in validated_data]) + name in validated_data]) related = OrderedDict(list(related_foreignkeys_dict.items()) + list(related_many2many_dict.items())) return related @@ -362,16 +363,13 @@ def create_relations(self, study, related): if getattr(study, name): getattr(study, name).delete() - this_serializer = serializer(context=context) instance = this_serializer.create(validated_data={**related[name]}) - setattr(study, name, instance) study.save() - if "curators" in related: if related["curators"]: @@ -404,7 +402,6 @@ def create_relations(self, study, related): study.save() - return study def validate(self, attrs): @@ -415,7 +412,7 @@ def validate(self, attrs): else: if attrs.get("date", None) is not None: _validate_not_allowed_key(attrs, "date", extra_message="For a study without a '^PKDB\d+$' identifier " - "the date must not be set in the study.json.") + "the date must not be set in the study.json.") if "curators" in attrs and "creator" in attrs: if attrs["creator"] not in [curator["user"] for curator in attrs["curators"]]: @@ -517,8 +514,8 @@ class StudyElasticSerializer(serializers.ModelSerializer): name = serializers.CharField(help_text="Name of the study. The convention is to deduce the name from the " "refererence with the following pattern " - "'[Author][PublicationYear][A-Z(optional)]'." ) - licence = serializers.CharField(help_text="Licence",) + "'[Author][PublicationYear][A-Z(optional)]'.") + licence = serializers.CharField(help_text="Licence", ) access = serializers.CharField() curators = CuratorRatingElasticSerializer(many=True, ) @@ -595,9 +592,10 @@ def get_files(self, obj): else: return [] + class StudyAnalysisSerializer(serializers.Serializer): sid = serializers.CharField() - name= serializers.CharField() + name = serializers.CharField() licence = serializers.CharField() access = serializers.CharField() date = serializers.DateField() @@ -610,8 +608,6 @@ class StudyAnalysisSerializer(serializers.Serializer): reference_title = serializers.SerializerMethodField() reference_date = serializers.DateField() - - def get_substances(self, obj): return [s["label"] for s in obj.substances] diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index b27d06b2..f21f098c 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -485,7 +485,7 @@ def __init__(self, timecourses = set() scatters = set() - for output in self.outputs.values("study_id","group_id", "individual_id", "id", "interventions__id", "subset__id", "output_type"): + for output in self.outputs.values("study_id", "group_id", "individual_id", "id", "interventions__id", "subset__id", "output_type"): studies.add(output["study_id"]) if output["group_id"]: groups.add(output["group_id"]) @@ -496,7 +496,7 @@ def __init__(self, if output["interventions__id"]: interventions.add(output["interventions__id"]) - if (output["subset__id"] is not None) & (output["output_type"] == Output.OutputTypes.Timecourse): + if output["output_type"] == Output.OutputTypes.Timecourse: timecourses.add(output["subset__id"]) if (output["subset__id"] is not None) & (output["output_type"] == Output.OutputTypes.Array): @@ -510,7 +510,6 @@ def __init__(self, "outputs": list(outputs), "timecourses": list(timecourses), "scatters": list(scatters), - } else: diff --git a/backend/pkdb_app/subjects/documents.py b/backend/pkdb_app/subjects/documents.py index dca8aa30..d8159c7f 100644 --- a/backend/pkdb_app/subjects/documents.py +++ b/backend/pkdb_app/subjects/documents.py @@ -76,6 +76,8 @@ class Django: class Index: name = 'individuals' settings = elastic_settings + settings['max_result_window'] = 100000 + # ------------------------------------ @@ -117,6 +119,8 @@ class Django: class Index: name = 'groups' settings = elastic_settings + settings['max_result_window'] = 100000 + def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" From 578aab328b5c5d6670795fad6f98e307bed4aaac Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Fri, 9 Oct 2020 10:42:50 +0200 Subject: [PATCH 19/29] minor diff --- backend/pkdb_app/studies/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index b9f5d516..c03d5ac2 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -497,7 +497,7 @@ def __init__(self, if output["interventions__id"]: interventions.add(output["interventions__id"]) - if output["output_type"] == Output.OutputTypes.Timecourse: + if (output["subset__id"] is not None) & (output["output_type"] == Output.OutputTypes.Timecourse): timecourses.add(output["subset__id"]) if (output["subset__id"] is not None) & (output["output_type"] == Output.OutputTypes.Array): From 5420d2703c9aed259ca5b86173bb631b897748a7 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Mon, 12 Oct 2020 14:24:21 +0200 Subject: [PATCH 20/29] bugfix README.md --- backend/README.md | 1 + backend/setup.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 backend/README.md diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 00000000..23bf695a --- /dev/null +++ b/backend/README.md @@ -0,0 +1 @@ +# PKDB \ No newline at end of file diff --git a/backend/setup.py b/backend/setup.py index 6607d443..52a0c33d 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- encoding: utf-8 -*- """ pkdb_app pip package """ From 66cfb205a78be1d0ac467273309045b5b539f9a5 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Mon, 12 Oct 2020 15:14:29 +0200 Subject: [PATCH 21/29] small fixes --- backend/pkdb_app/documents.py | 2 +- backend/pkdb_app/studies/documents.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/pkdb_app/documents.py b/backend/pkdb_app/documents.py index 31717974..a3daed5c 100644 --- a/backend/pkdb_app/documents.py +++ b/backend/pkdb_app/documents.py @@ -17,7 +17,7 @@ 'number_of_shards': 1, 'number_of_replicas': 1, 'max_ngram_diff': 15, - 'max_terms_count':65536*4, + 'max_terms_count': 65536*4, } edge_ngram_filter = token_filter( diff --git a/backend/pkdb_app/studies/documents.py b/backend/pkdb_app/studies/documents.py index 8b17546f..003a9412 100644 --- a/backend/pkdb_app/studies/documents.py +++ b/backend/pkdb_app/studies/documents.py @@ -196,7 +196,6 @@ class StudyDocument(Document): ) dataset = common_setfields("subsets") - class Django: model = Study # Ignore auto updating of Elasticsearch when a model is saved/deleted From cf741d416574ca33d7cbddbfa17f25ce845fa1e3 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Fri, 16 Oct 2020 12:48:37 +0200 Subject: [PATCH 22/29] closes #671 --- backend/pkdb_app/data/serializers.py | 4 +- backend/pkdb_app/documents.py | 2 +- backend/pkdb_app/outputs/documents.py | 4 +- backend/pkdb_app/studies/views.py | 132 +++++++++++-------------- backend/pkdb_app/subjects/documents.py | 4 +- backend/setup.py | 2 +- 6 files changed, 63 insertions(+), 85 deletions(-) diff --git a/backend/pkdb_app/data/serializers.py b/backend/pkdb_app/data/serializers.py index 51c68b75..345ad3a7 100644 --- a/backend/pkdb_app/data/serializers.py +++ b/backend/pkdb_app/data/serializers.py @@ -378,7 +378,7 @@ class TimecourseSerializer(serializers.Serializer): subset_pk = serializers.IntegerField(source="pk") subset_name = serializers.CharField(source="name") - interventions = serializers.SerializerMethodField() + intervention_pk = serializers.SerializerMethodField() group_pk = serializers.SerializerMethodField() individual_pk = serializers.SerializerMethodField() normed = serializers.SerializerMethodField() @@ -434,7 +434,7 @@ def get_output_pk(self, obj): result = self._get_general(json.dumps(obj.to_dict())) return self._get_field(obj, "pk") - def get_interventions(self, obj): + def get_intervention_pk(self, obj): result = self._get_general(json.dumps(obj.to_dict())) return [i["pk"] for i in result["interventions"].iloc[0]] diff --git a/backend/pkdb_app/documents.py b/backend/pkdb_app/documents.py index 31717974..a3daed5c 100644 --- a/backend/pkdb_app/documents.py +++ b/backend/pkdb_app/documents.py @@ -17,7 +17,7 @@ 'number_of_shards': 1, 'number_of_replicas': 1, 'max_ngram_diff': 15, - 'max_terms_count':65536*4, + 'max_terms_count': 65536*4, } edge_ngram_filter = token_filter( diff --git a/backend/pkdb_app/outputs/documents.py b/backend/pkdb_app/outputs/documents.py index 529f98c9..8df2988c 100644 --- a/backend/pkdb_app/outputs/documents.py +++ b/backend/pkdb_app/outputs/documents.py @@ -75,7 +75,7 @@ class Index: settings = elastic_settings settings['number_of_shards'] = 5 settings['number_of_replicas'] = 1 - settings['max_result_window'] = 100000 + settings['max_result_window'] = 500000 @registry.register_document @@ -132,7 +132,7 @@ class Index: settings = elastic_settings settings['number_of_shards'] = 5 settings['number_of_replicas'] = 1 - settings['max_result_window'] = 100000 + settings['max_result_window'] = 500000 def get_queryset(self): diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index c03d5ac2..c72ebb06 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -1,4 +1,3 @@ - import tempfile import uuid import zipfile @@ -31,7 +30,7 @@ from pkdb_app.data.models import SubSet, Data from pkdb_app.data.serializers import TimecourseSerializer from pkdb_app.data.views import SubSetViewSet -from pkdb_app.documents import UUID_PARAM +from pkdb_app.documents import UUID_PARAM from pkdb_app.interventions.serializers import InterventionElasticSerializerAnalysis from pkdb_app.outputs.serializers import OutputInterventionSerializer from pkdb_app.subjects.serializers import GroupCharacteristicaSerializer, IndividualCharacteristicaSerializer @@ -226,6 +225,7 @@ def related_elastic_dict(study): docs_dict[ReferenceDocument] = study.reference return docs_dict + @method_decorator(name='list', decorator=swagger_auto_schema(manual_parameters=[UUID_PARAM])) class ElasticStudyViewSet(BaseDocumentViewSet, APIView): """ Endpoint to query studies @@ -337,23 +337,24 @@ def get_queryset(self): ) return qs + class StudyAnalysisViewSet(ElasticStudyViewSet): swagger_schema = None serializer_class = StudyAnalysisSerializer filter_fields = { 'study_sid': {'field': 'sid.raw', - 'lookups': [ - LOOKUP_QUERY_IN, - LOOKUP_QUERY_EXCLUDE, + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, - ], + ], }, 'study_name': {'field': 'name.raw', - 'lookups': [ - LOOKUP_QUERY_IN, - LOOKUP_QUERY_EXCLUDE, + 'lookups': [ + LOOKUP_QUERY_IN, + LOOKUP_QUERY_EXCLUDE, - ], + ], }, } @@ -367,7 +368,8 @@ class ElasticReferenceViewSet(BaseDocumentViewSet): pagination_class = CustomPagination permission_classes = (IsAdminOrCreatorOrCurator,) serializer_class = ReferenceElasticSerializer - filter_backends = [FilteringFilterBackend, IdsFilterBackend, OrderingFilterBackend, CompoundSearchFilterBackend, MultiMatchSearchFilterBackend] + filter_backends = [FilteringFilterBackend, IdsFilterBackend, OrderingFilterBackend, CompoundSearchFilterBackend, + MultiMatchSearchFilterBackend] search_fields = ( 'sid', 'pmid', @@ -399,6 +401,7 @@ class ElasticReferenceViewSet(BaseDocumentViewSet): class PKData(object): """ PKData represents a consistent set of pharmacokinetic data. """ + def __init__(self, request, concise: bool = True, @@ -415,7 +418,6 @@ def __init__(self, self.request = request - time_init = time.time() self.outputs = Output.objects.filter(normed=True).select_related("study__sid").prefetch_related( @@ -424,7 +426,6 @@ def __init__(self, queryset=Intervention.objects.only('id'))).only( 'group_id', 'individual_id', "id", "interventions__id", "subset__id", "output_type") - # --- Elastic --- if studies_query: self.studies_query = studies_query @@ -433,7 +434,7 @@ def __init__(self, self.outputs = self.outputs.filter(study_id__in=studies_pks) else: - studies_pks = StudyViewSet.filter_on_permissions(request.user,Study.objects).values_list("id", flat=True) + studies_pks = StudyViewSet.filter_on_permissions(request.user, Study.objects).values_list("id", flat=True) self.outputs = self.outputs.filter(study_id__in=Subquery(studies_pks)) self.studies = Study.objects.filter(id__in=studies_pks) @@ -448,11 +449,10 @@ def __init__(self, time_elastic_individuals = time.time() if concise: self.outputs = self.outputs.filter( - DQ(group_id__in=groups_pks) | DQ(individual_id__in=individuals_pks)) + DQ(group_id__in=groups_pks) | DQ(individual_id__in=individuals_pks)) else: - self.studies = self.studies.filter(DQ(groups__id__in=groups_pks) | DQ(individuals__id__in=individuals_pks)) - - + self.studies = self.studies.filter( + DQ(groups__id__in=groups_pks) | DQ(individuals__id__in=individuals_pks)) if interventions_query: self.interventions_query = {"normed": "true", **interventions_query} @@ -473,7 +473,6 @@ def __init__(self, self.studies = self.studies.filter(outputs__id__in=outputs_pks) - time_elastic = time.time() time_loop_start = time.time() @@ -486,7 +485,8 @@ def __init__(self, timecourses = set() scatters = set() - for output in self.outputs.values("study_id", "group_id", "individual_id", "id", "interventions__id", "subset__id", "output_type"): + for output in self.outputs.values("study_id", "group_id", "individual_id", "id", "interventions__id", + "subset__id", "output_type"): studies.add(output["study_id"]) if output["group_id"]: groups.add(output["group_id"]) @@ -528,8 +528,10 @@ def __init__(self, "individuals": list(self.individuals.values_list("pk", flat=True)), "interventions": list(self.interventions.values_list("pk", flat=True)), "outputs": list(self.outputs.values_list("pk", flat=True)), - "timecourses": list(self.subset.filter(data__data_type=Data.DataTypes.Timecourse).values_list("pk", flat=True)), - "scatters": list(self.subset.filter(data__data_type=Data.DataTypes.Scatter).values_list("pk", flat=True)), + "timecourses": list( + self.subset.filter(data__data_type=Data.DataTypes.Timecourse).values_list("pk", flat=True)), + "scatters": list( + self.subset.filter(data__data_type=Data.DataTypes.Scatter).values_list("pk", flat=True)), } time_loop_end = time.time() @@ -542,7 +544,7 @@ def __init__(self, print("init:", time_init - time_start) print("elastic:", time_elastic - time_init) print("django:", time_django - time_elastic) - print("Loop:", time_loop_end- time_loop_start) + print("Loop:", time_loop_end - time_loop_start) print("-" * 80) @@ -560,7 +562,7 @@ def individual_pks(self): return self._pks(view_class=IndividualViewSet, query_dict=self.individuals_query) def output_pks(self): - return self._pks(view_class=ElasticOutputViewSet, query_dict=self.outputs_query,scan_size=20000) + return self._pks(view_class=ElasticOutputViewSet, query_dict=self.outputs_query, scan_size=20000) def subset_pks(self): return self._pks(view_class=SubSetViewSet, query_dict=self.subsets_query) @@ -568,7 +570,7 @@ def subset_pks(self): def study_pks(self): return self._pks(view_class=ElasticStudyViewSet, query_dict=self.studies_query, pk_field="pk") - def set_request_get(self, query_dict:Dict): + def set_request_get(self, query_dict: Dict): """ :param query_dict: @@ -579,7 +581,7 @@ def set_request_get(self, query_dict:Dict): get[k] = v self.request._request.GET = get - def _pks(self, view_class: DocumentViewSet, query_dict: Dict, pk_field: str="pk", scan_size=10000): + def _pks(self, view_class: DocumentViewSet, query_dict: Dict, pk_field: str = "pk", scan_size=10000): """ query elastic search for pks. """ @@ -590,11 +592,11 @@ def _pks(self, view_class: DocumentViewSet, query_dict: Dict, pk_field: str="pk" response = queryset.source([pk_field]).params(size=scan_size).scan() return [instance[pk_field] for instance in response] - def data_by_query_dict(self,query_dict, viewset, serializer, boost): + def data_by_query_dict(self, query_dict, viewset, serializer, boost): view = viewset(request=self.request) queryset = view.filter_queryset(view.get_queryset()) if boost: - queryset=queryset.filter("terms", **query_dict).source(serializer.Meta.fields) + queryset = queryset.filter("terms", **query_dict).source(serializer.Meta.fields) return [hit.to_dict() for hit in queryset.params(size=5000).scan()] else: @@ -603,21 +605,21 @@ def data_by_query_dict(self,query_dict, viewset, serializer, boost): return serializer(queryset.params(size=5000).scan(), many=True).data - - class ResponseSerializer(serializers.Serializer): """Documentation of response schema.""" uuid = serializers.UUIDField( required=True, allow_null=False, help_text="The resulting queries can be accessed by adding this uuid as " - "an argument to the endpoints: /studies/, /groups/, /individuals/, /outputs/, /timecourses/, /subsets/." + "an argument to the endpoints: /studies/, /groups/, /individuals/, /outputs/, /timecourses/, /subsets/." ) studies = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting studies.") groups = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting groups.") - individuals = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting individuals.") + individuals = serializers.IntegerField(required=True, allow_null=False, + help_text="Number of resulting individuals.") outputs = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting outputs.") - timecourses = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting timecourses.") + timecourses = serializers.IntegerField(required=True, allow_null=False, + help_text="Number of resulting timecourses.") scatters = serializers.IntegerField(required=True, allow_null=False, help_text="Number of resulting scatters.") @@ -707,7 +709,6 @@ def _get_param(self, key, request): } ) - def get(self, request, *args, **kw): time_start_request = time.time() @@ -740,25 +741,28 @@ def get(self, request, *args, **kw): if request.GET.get("download") == "true": - - def serialize_scatters(ids): scatter_subsets = SubSet.objects.filter(id__in=ids) return [t.scatter_representation for t in scatter_subsets] - def serialize_timecourses(ids): - scatter_subsets = SubSet.objects.filter(id__in=ids) - return [t.timecourse_representation for t in scatter_subsets] - - Sheet = namedtuple("Sheet", ["sheet_name", "query_dict", "viewset", "serializer", "function", "boost_performance",]) + Sheet = namedtuple("Sheet", + ["sheet_name", "query_dict", "viewset", "serializer", "function", "boost_performance", ]) table_content = { - "studies": Sheet("Studies", {"pk": pkdata.ids["studies"]}, ElasticStudyViewSet, StudyAnalysisSerializer, None, False), - "groups": Sheet("Groups", {"group_pk": pkdata.ids["groups"]}, GroupCharacteristicaViewSet, GroupCharacteristicaSerializer, None, True,), - "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, IndividualCharacteristicaViewSet,IndividualCharacteristicaSerializer, None, True), - "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]} ,ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None, False), - "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, OutputInterventionSerializer,None, True), - "timecourses": Sheet("Timecourses", {"pk": pkdata.ids["timecourses"]}, SubSetViewSet, TimecourseSerializer, None, False), - "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatters, None), + "studies": Sheet("Studies", {"pk": pkdata.ids["studies"]}, ElasticStudyViewSet, StudyAnalysisSerializer, + None, False), + "groups": Sheet("Groups", {"group_pk": pkdata.ids["groups"]}, GroupCharacteristicaViewSet, + GroupCharacteristicaSerializer, None, True, ), + "individuals": Sheet("Individuals", {"individual_pk": pkdata.ids["individuals"]}, + IndividualCharacteristicaViewSet, IndividualCharacteristicaSerializer, None, True), + "interventions": Sheet("Interventions", {"pk": pkdata.ids["interventions"]}, + ElasticInterventionAnalysisViewSet, InterventionElasticSerializerAnalysis, None, + False), + "outputs": Sheet("Outputs", {"output_pk": pkdata.ids["outputs"]}, OutputInterventionViewSet, + OutputInterventionSerializer, None, True), + "timecourses": Sheet("Timecourses", {"pk": pkdata.ids["timecourses"]}, SubSetViewSet, + TimecourseSerializer, None, False), + "scatters": Sheet("Scatter", {"subset_pk": pkdata.ids["scatters"]}, None, None, serialize_scatters, + None), } # Create archive @@ -777,40 +781,14 @@ def serialize_timecourses(ids): download_times[key] = time.time() - download_time_start else: - df = pd.DataFrame(pkdata.data_by_query_dict(sheet.query_dict,sheet.viewset,sheet.serializer, sheet.boost_performance)) + df = pd.DataFrame( + pkdata.data_by_query_dict(sheet.query_dict, sheet.viewset, sheet.serializer, + sheet.boost_performance)) if len(df) < 0: df = df[sheet.serializer.Meta.fields] df.to_csv(string_buffer) archive.writestr(f'{key}.csv', string_buffer.getvalue()) download_times[key] = time.time() - download_time_start - """ - if key == "outputs": - string_buffer = StringIO() - download_time_start_timecourse = time.time() - def sorted_tuple(v): - return sorted(tuple(v)) - timecourse_df = df[df["output_type"] == Output.OutputTypes.Timecourse] - - def unique_or_sorted_list(v): - values = v.unique() - if len(values) == 1: - return values[0] - return tuple(values) - - if len(timecourse_df) !=0: - #timecourse_df = pd.pivot_table(data=timecourse_df,index=["output_pk"], aggfunc=sorted_tuple, dropna=False).apply(SubSet.to_list) - #timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc=tuple, dropna=False).apply(SubSet.to_list) - timecourse_df = pd.pivot_table(data=timecourse_df, index=["output_pk"],aggfunc=unique_or_sorted_list,fill_value=np.NAN)#.reset_index() - timecourse_df = pd.pivot_table(data=timecourse_df,index=["label","study_name"], aggfunc= unique_or_sorted_list, fill_value=np.NAN)#.reset_index() - print(timecourse_df.columns) - - #timecourse_df = timecourse_df[table_content["outputs"].serializer.Meta.fields] - else: - timecourse_df = pd.DataFrame([]) - timecourse_df.to_csv(string_buffer) - archive.writestr('timecourse.csv', string_buffer.getvalue()) - download_times["timecourse"] = time.time()-download_time_start_timecourse - """ archive.write('download_extra/README.md', 'README.md') archive.write('download_extra/TERMS_OF_USE.md', 'TERMS_OF_USE.md') diff --git a/backend/pkdb_app/subjects/documents.py b/backend/pkdb_app/subjects/documents.py index d8159c7f..1070942d 100644 --- a/backend/pkdb_app/subjects/documents.py +++ b/backend/pkdb_app/subjects/documents.py @@ -200,7 +200,7 @@ class Django: class Index: name = "group_characteristica" - settings = {**elastic_settings, 'max_result_window': 50000} + settings = {**elastic_settings, 'max_result_window': 100000} def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" @@ -279,7 +279,7 @@ class Django: class Index: name = "individual_characteristica" - settings = {**elastic_settings, 'max_result_window': 50000} + settings = {**elastic_settings, 'max_result_window': 100000} def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" diff --git a/backend/setup.py b/backend/setup.py index 6607d443..4708dbf8 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -43,7 +43,7 @@ def read(*names, **kwargs): raise RuntimeError("Unable to find version string") # description from markdown -long_description = read('README.md') +long_description = read('download_extra/README.md') setup_kwargs['long_description'] = long_description setup( From d5e3e828dac3418be7a51f7fe85749b032712694 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Fri, 16 Oct 2020 13:57:40 +0200 Subject: [PATCH 23/29] consistent naming of scatter fields --- backend/pkdb_app/data/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/pkdb_app/data/models.py b/backend/pkdb_app/data/models.py index f62d63e8..1d9616d9 100644 --- a/backend/pkdb_app/data/models.py +++ b/backend/pkdb_app/data/models.py @@ -37,6 +37,7 @@ class DataTypes(models.TextChoices): image = models.ForeignKey('subjects.DataFile', related_name="data", on_delete=models.CASCADE, null=True) dataset = models.ForeignKey(DataSet, related_name="data", on_delete=models.CASCADE, null=True) + class Timecourseable(models.Model): class Meta: abstract = True @@ -59,7 +60,7 @@ def reformat_timecourse(self, timecourse, mapping): """ FIXME: Documentation & type hinting """ for new_key, old_key in mapping.items(): timecourse[new_key] = timecourse.pop(old_key) - if new_key == "interventions": + if new_key == "intervention_pk": if isinstance(timecourse[new_key], int): timecourse[new_key] = (timecourse[new_key],) @@ -103,7 +104,7 @@ def keys_timecourse_representation(self): "outputs_pk": "outputs__pk", "subset_pk": "subset_id", "subset_name": "subset__name", - "interventions": "outputs__interventions__pk", + "intervention_pk": "outputs__interventions__pk", "group_pk": "outputs__group_id", "individual_pk": "outputs__individual_id", "normed": 'outputs__normed', From a4b0289a41397fc230f9b143ae46fff81f6ad150 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Fri, 16 Oct 2020 16:33:15 +0200 Subject: [PATCH 24/29] removed newly introduced bug --- backend/pkdb_app/data/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pkdb_app/data/models.py b/backend/pkdb_app/data/models.py index 1d9616d9..676d9038 100644 --- a/backend/pkdb_app/data/models.py +++ b/backend/pkdb_app/data/models.py @@ -60,7 +60,7 @@ def reformat_timecourse(self, timecourse, mapping): """ FIXME: Documentation & type hinting """ for new_key, old_key in mapping.items(): timecourse[new_key] = timecourse.pop(old_key) - if new_key == "intervention_pk": + if new_key in ["intervention_pk", "interventions"]: if isinstance(timecourse[new_key], int): timecourse[new_key] = (timecourse[new_key],) From 87f32d8ecc1203b9966d5866f38b0e989b7d0fb5 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Mon, 19 Oct 2020 14:49:21 +0200 Subject: [PATCH 25/29] unify serializer timecourses and outputs --- backend/pkdb_app/outputs/documents.py | 10 ++++++++ backend/pkdb_app/outputs/models.py | 33 +++++++++++++++++++++---- backend/pkdb_app/outputs/serializers.py | 14 +++++++++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/backend/pkdb_app/outputs/documents.py b/backend/pkdb_app/outputs/documents.py index 8df2988c..d1dc4711 100644 --- a/backend/pkdb_app/outputs/documents.py +++ b/backend/pkdb_app/outputs/documents.py @@ -90,15 +90,25 @@ class OutputInterventionDocument(Document): label = string_field('label') output_type = string_field('output_type') measurement_type = string_field("measurement_type") + measurement_type_label = string_field("measurement_type_label") + substance = string_field("substance") + substance_label = string_field("substance_label") + normed = fields.BooleanField() calculated = fields.BooleanField() method = string_field('method') + method_label = string_field('method_label') + tissue = string_field('tissue') + tissue_label = string_field('tissue_label') + time = fields.FloatField('time') time_unit = string_field('time_unit') unit = string_field('unit') choice = string_field('choice') + choice_label = string_field('choice_label') + # output fields value = fields.FloatField('value') diff --git a/backend/pkdb_app/outputs/models.py b/backend/pkdb_app/outputs/models.py index 0f409ade..0dd85740 100644 --- a/backend/pkdb_app/outputs/models.py +++ b/backend/pkdb_app/outputs/models.py @@ -278,22 +278,40 @@ def time_unit(self): @property def tissue(self): if self.output.tissue: - return self.output.tissue.info_node.name + return self.output.tissue.info_node.sid + @property + def tissue_label(self): + if self.output.tissue: + return self.output.tissue.info_node.label @property def method(self): if self.output.method: - return self.output.method.info_node.name + return self.output.method.info_node.sid + + @property + def method_label(self): + if self.output.method: + return self.output.method.info_node.label @property def measurement_type(self): - return self.output.measurement_type.info_node.name + return self.output.measurement_type.info_node.sid + + @property + def measurement_type_label(self): + return self.output.measurement_type.info_node.label @property def choice(self): if self.output.choice: - return self.output.choice.info_node.name + return self.output.choice.info_node.sid + + @property + def choice_label(self): + if self.output.choice: + return self.output.choice.info_node.label @property def label(self): @@ -302,7 +320,12 @@ def label(self): @property def substance(self): if self.output.substance: - return self.output.substance.info_node.name + return self.output.substance.info_node.sid + + @property + def substance_label(self): + if self.output.substance: + return self.output.substance.info_node.label @property def normed(self): diff --git a/backend/pkdb_app/outputs/serializers.py b/backend/pkdb_app/outputs/serializers.py index ee8e61aa..f1c0dbdc 100644 --- a/backend/pkdb_app/outputs/serializers.py +++ b/backend/pkdb_app/outputs/serializers.py @@ -325,16 +325,26 @@ class OutputInterventionSerializer(serializers.Serializer): calculated = serializers.BooleanField() tissue = serializers.CharField() + tissue_label = serializers.CharField() + method = serializers.CharField() + method_label = serializers.CharField() + label = serializers.CharField() output_type = serializers.CharField() time = serializers.FloatField() time_unit = serializers.CharField() - measurement_type =serializers.CharField() + measurement_type = serializers.CharField() + measurement_type_label = serializers.CharField() + choice = serializers.CharField() - substance =serializers.CharField() + choice_label = serializers.CharField() + + substance = serializers.CharField() + substance_label = serializers.CharField() + value = serializers.FloatField() mean = serializers.FloatField() From 4ed6bc0d465d278b4c73eddb815149e1ec820b45 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Mon, 19 Oct 2020 18:59:49 +0200 Subject: [PATCH 26/29] better serialization of interventions analysis --- backend/pkdb_app/interventions/serializers.py | 71 +++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/backend/pkdb_app/interventions/serializers.py b/backend/pkdb_app/interventions/serializers.py index accd1c2a..ecd889df 100644 --- a/backend/pkdb_app/interventions/serializers.py +++ b/backend/pkdb_app/interventions/serializers.py @@ -304,15 +304,26 @@ class InterventionElasticSerializerAnalysis(serializers.Serializer): normed = serializers.BooleanField() name = serializers.CharField() - route = serializers.CharField(source="route_name",) - form = serializers.CharField(source="form_name",) - application = serializers.CharField(source="application_name",) + route = serializers.SerializerMethodField() + route_label = serializers.SerializerMethodField() + + form = serializers.SerializerMethodField() + form_label = serializers.SerializerMethodField() + + application = serializers.SerializerMethodField() + application_label = serializers.SerializerMethodField() + time = serializers.FloatField() time_end = serializers.FloatField() time_unit = serializers.CharField() - measurement_type = serializers.CharField(source="measurement_type_name",) - choice = serializers.CharField(source="choice_name") - substance = serializers.CharField(source="substance_name", ) + measurement_type = serializers.SerializerMethodField() + measurement_type_label = serializers.SerializerMethodField() + + choice = serializers.SerializerMethodField() + choice_label =serializers.SerializerMethodField() + + substance = serializers.SerializerMethodField() + substance_label = serializers.SerializerMethodField() value = serializers.FloatField() mean = serializers.FloatField() @@ -324,6 +335,54 @@ class InterventionElasticSerializerAnalysis(serializers.Serializer): cv = serializers.FloatField() unit = serializers.CharField() + def get_choice(self, obj): + if obj.choice: + return obj.choice.sid + + def get_choice_label(self, obj): + if obj.choice: + return obj.choice.label + + def get_route(self, obj): + if obj.route: + return obj.route.sid + + def get_route_label(self, obj): + if obj.route: + return obj.route.label + + def get_form(self, obj): + if obj.form: + return obj.form.sid + + def get_form_label(self, obj): + if obj.form: + return obj.form.label + + def get_application(self, obj): + if obj.application: + return obj.application.sid + + def get_application_label(self, obj): + if obj.application: + return obj.application.label + + def get_measurement_type(self, obj): + if obj.measurement_type: + return obj.measurement_type.sid + + def get_measurement_type_label(self, obj): + if obj.measurement_type: + return obj.measurement_type.label + + def get_substance(self, obj): + if obj.substance: + return obj.substance.sid + + def get_substance_label(self, obj): + if obj.substance: + return obj.substance.label + class Meta: fields = ["study_sid", "study_name", "intervention_pk", "raw_pk", "normed"] + INTERVENTION_FIELDS + MEASUREMENTTYPE_FIELDS From fa95536c921da7165431317e4a9b95a782fd2986 Mon Sep 17 00:00:00 2001 From: Jan Grzegorzewski Date: Wed, 21 Oct 2020 16:41:15 +0200 Subject: [PATCH 27/29] nothing new --- backend/pkdb_app/studies/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/pkdb_app/studies/views.py b/backend/pkdb_app/studies/views.py index c72ebb06..b0bf088f 100644 --- a/backend/pkdb_app/studies/views.py +++ b/backend/pkdb_app/studies/views.py @@ -4,7 +4,6 @@ from collections import namedtuple from datetime import datetime from io import StringIO -from pathlib import Path from typing import Dict import time import pandas as pd From b8f205ff51df31587216d1ce1c20705e3d8c1ca5 Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Tue, 3 Nov 2020 11:28:36 +0100 Subject: [PATCH 28/29] Fix #679, updated django --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 01f73f9c..71c0caaa 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,7 +5,7 @@ json-logging>=1.2.6 psycopg2-binary>=2.8.5 # django -Django == 3.1.2 +Django == 3.1.3 django-model-utils>=4.0.0 django-extra-fields>=3.0.0 django-storages>=1.9.1 From 53ca65ea1e34cc98ae953887545fd2f12a6ebb2a Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Sun, 8 Nov 2020 17:57:47 +0100 Subject: [PATCH 29/29] added publication information --- README.md | 27 +++++++++++++++----------- backend/download_extra/README.md | 23 +++++++++++----------- backend/pkdb_app/_version.py | 2 +- frontend/src/components/Home.vue | 20 +++++++++++++++++++ frontend/src/components/home/About.vue | 5 +++-- release-notes/0.9.4.md | 4 +++- 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 5af31e10..f2628421 100644 --- a/README.md +++ b/README.md @@ -31,17 +31,6 @@ Important features are PK-DB is available at https://pk-db.com -## License -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -PK-DB code and documentation is licensed as -* Source Code: [LGPLv3](http://opensource.org/licenses/LGPL-3.0) -* Documentation: [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) - -## Funding -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -Jan Grzegorzewski and Matthias König are supported by the Federal Ministry of Education and Research (BMBF, Germany) -within the research network Systems Medicine of the Liver ([LiSyM](http://www.lisym.org/), grant number 031L0054). - ## How to cite [[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) If you use PK-DB data or the web interface cite @@ -50,8 +39,24 @@ If you use PK-DB data or the web interface cite > Jan Grzegorzewski, Janosch Brandhorst, Dimitra Eleftheriadou, Kathleen Green, Matthias König > bioRxiv 760884; doi: https://doi.org/10.1101/760884 +> Grzegorzewski J, Brandhorst J, Green K, Eleftheriadou D, Duport Y, Barthorscht F, Köller A, Ke DYJ, De Angelis S, König M. +> *PK-DB: pharmacokinetics database for individualized and stratified computational modeling*. +> Nucleic Acids Res. 2020 Nov 5:gkaa990. doi: [10.1093/nar/gkaa990](https://doi.org/10.1093/nar/gkaa990). Epub ahead of print. PMID: [33151297](https://pubmed.ncbi.nlm.nih.gov/33151297/). + If you use PK-DB code cite in addition [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) +## License +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +PK-DB code and documentation is licensed as +* Source Code: [LGPLv3](http://opensource.org/licenses/LGPL-3.0) +* Documentation: [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) + +## Funding +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +Jan Grzegorzewski and Matthias König are supported by the Federal Ministry of Education and Research (BMBF, Germany) +within the research network Systems Medicine of the Liver ([LiSyM](http://www.lisym.org/), grant number 031L0054). + + © 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. diff --git a/backend/download_extra/README.md b/backend/download_extra/README.md index 5af31e10..48c58947 100644 --- a/backend/download_extra/README.md +++ b/backend/download_extra/README.md @@ -31,6 +31,18 @@ Important features are PK-DB is available at https://pk-db.com +## How to cite +[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) +If you use PK-DB data or the web interface cite + +> Grzegorzewski J, Brandhorst J, Green K, Eleftheriadou D, Duport Y, Barthorscht F, Köller A, Ke DYJ, De Angelis S, König M. +> *PK-DB: pharmacokinetics database for individualized and stratified computational modeling*. +> Nucleic Acids Res. 2020 Nov 5:gkaa990. doi: [10.1093/nar/gkaa990](https://doi.org/10.1093/nar/gkaa990). Epub ahead of print. PMID: [33151297](https://pubmed.ncbi.nlm.nih.gov/33151297/). + +If you use PK-DB code cite in addition + +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) + ## License [[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) PK-DB code and documentation is licensed as @@ -42,16 +54,5 @@ PK-DB code and documentation is licensed as Jan Grzegorzewski and Matthias König are supported by the Federal Ministry of Education and Research (BMBF, Germany) within the research network Systems Medicine of the Liver ([LiSyM](http://www.lisym.org/), grant number 031L0054). -## How to cite -[[^]](https://github.com/matthiaskoenig/pkdb#pk-db---a-pharmacokinetics-database) -If you use PK-DB data or the web interface cite - -> *PK-DB: PharmacoKinetics DataBase for Individualized and Stratified Computational Modeling* -> Jan Grzegorzewski, Janosch Brandhorst, Dimitra Eleftheriadou, Kathleen Green, Matthias König -> bioRxiv 760884; doi: https://doi.org/10.1101/760884 - -If you use PK-DB code cite in addition - -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1406979.svg)](https://doi.org/10.5281/zenodo.1406979) © 2017-2020 Jan Grzegorzewski & Matthias König; https://livermetabolism.com. diff --git a/backend/pkdb_app/_version.py b/backend/pkdb_app/_version.py index ec4dbea7..9f65c118 100644 --- a/backend/pkdb_app/_version.py +++ b/backend/pkdb_app/_version.py @@ -1,4 +1,4 @@ """ Definition of version string. """ -__version__ = "0.9.3" +__version__ = "0.9.4" diff --git a/frontend/src/components/Home.vue b/frontend/src/components/Home.vue index 334678ff..b6259c74 100644 --- a/frontend/src/components/Home.vue +++ b/frontend/src/components/Home.vue @@ -15,6 +15,26 @@ pharmacokinetics data enriched with the required meta-information for computational modeling and data integration.

+ + + + + + fas fa-file-alt + + + + + + PK-DB: pharmacokinetics database for individualized and stratified computational modeling
+ Grzegorzewski J, Brandhorst J, Green K, Eleftheriadou D, Duport Y, Barthorscht F, Köller A, Ke DYJ, De Angelis S, König M.
+ Nucleic Acids Res. 2020 Nov 5:gkaa990. doi: 10.1093/nar/gkaa990. Epub ahead of print. PMID: 33151297 +
+
+

Data

diff --git a/frontend/src/components/home/About.vue b/frontend/src/components/home/About.vue index 52b95a5d..676b4a8b 100644 --- a/frontend/src/components/home/About.vue +++ b/frontend/src/components/home/About.vue @@ -34,9 +34,10 @@

How to cite

- PK-DB: PharmacoKinetics DataBase for Individualized and Stratified Computational Modeling
- Jan Grzegorzewski, Janosch Brandhorst, Dimitra Eleftheriadou, Kathleen Green, Matthias König
+ PK-DB: pharmacokinetics database for individualized and stratified computational modeling
+ Grzegorzewski J, Brandhorst J, Green K, Eleftheriadou D, Duport Y, Barthorscht F, Köller A, Ke DYJ, De Angelis S, König M.
bioRxiv 760884; doi: https://doi.org/10.1101/760884 + Nucleic Acids Res. 2020 Nov 5:gkaa990. doi: 10.1093/nar/gkaa990. Epub ahead of print. PMID: 33151297

Licensing

diff --git a/release-notes/0.9.4.md b/release-notes/0.9.4.md index 5c2b6ac1..64ec16be 100644 --- a/release-notes/0.9.4.md +++ b/release-notes/0.9.4.md @@ -1,9 +1,11 @@ # Release notes for pkdb 0.9.4 ## New features +- updated publication information ## Fixes +- multiple fixes in serializers +- bugfixes in download and speedup - bugfix groups and individuals JSON button (#660) - security bugfix django (#665) -## Deprecated features