diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43f6b38..a2bbf1d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,11 +41,11 @@ jobs: - name: Checkout Code Repository uses: actions/checkout@v4 - run: | - ENCODED=$(echo "${{ secrets.ENVS_FILE }}") - echo "Encoded length: ${#ENCODED}" - echo "$ENCODED" | base64 -d > envs.tar.gz - tar -xzvf envs.tar.gz + run: | + ENCODED=$(echo "${{ secrets.ENVS_FILE }}") + echo "Encoded length: ${#ENCODED}" + echo "$ENCODED" | base64 -d > envs.tar.gz + tar -xzvf envs.tar.gz - name: Build the Stack run: docker compose -f local.yml build diff --git a/config/api_router.py b/config/api_router.py index ec17b03..55aba90 100644 --- a/config/api_router.py +++ b/config/api_router.py @@ -13,6 +13,7 @@ GenomicFeatureViewSet, PromoterSetSigViewSet, PromoterSetViewSet, + RankResponseViewSet, RegulatorViewSet, ) from yeastregulatorydb.users.api.views import UserViewSet @@ -34,6 +35,7 @@ router.register("genomicfeature", GenomicFeatureViewSet) router.register("promotersetsig", PromoterSetSigViewSet) router.register("promoterset", PromoterSetViewSet) +router.register("rankresponse", RankResponseViewSet) router.register("regulator", RegulatorViewSet) diff --git a/setup.cfg b/setup.cfg index 2412f17..3693942 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ [flake8] max-line-length = 119 exclude = .tox,.git,*/migrations/*,*/static/CACHE/*,docs,node_modules,venv,.venv -extend-ignore = E203 +extend-ignore = E203,E731 [pycodestyle] max-line-length = 119 diff --git a/yeastregulatorydb/conftest.py b/yeastregulatorydb/conftest.py index 9a4b281..4aeded5 100644 --- a/yeastregulatorydb/conftest.py +++ b/yeastregulatorydb/conftest.py @@ -23,7 +23,6 @@ BindingFactory, BindingManualQCFactory, CallingCardsBackgroundFactory, - ChrMapFactory, DataSourceFactory, ExpressionFactory, ExpressionManualQCFactory, @@ -31,6 +30,7 @@ GenomicFeatureFactory, PromoterSetFactory, PromoterSetSigFactory, + RankResponseFactory, RegulatorFactory, ) from yeastregulatorydb.users.models import User @@ -156,8 +156,16 @@ def datasource(db) -> DataSource: def fileformat(db) -> QuerySet: # harb, hu both csvs format_dict = { - "array": ({"gene_id": "int", "effect": "float", "pval": "float"}, ",", "effect", "pval"), - "qbed": ({"chr": "str", "start": "int", "end": "int", "depth": "int", "strand": "str"}, "\t", "none", "none"), + "array": ({"gene_id": "int", "effect": "float", "pval": "float"}, ",", "effect", 0.0, "pval", 1.0, "none"), + "qbed": ( + {"chr": "str", "start": "int", "end": "int", "depth": "int", "strand": "str"}, + "\t", + "none", + 0.0, + "none", + 1.0, + "none", + ), "chipexo_allevents": ( { "chr": "str", @@ -170,7 +178,10 @@ def fileformat(db) -> QuerySet: }, ",", "YPD_log2Fold", + 0.0, "YPD_log2P", + 1.0, + "none", ), "chipexo_promoter_sig": ( { @@ -188,6 +199,7 @@ def fileformat(db) -> QuerySet: 0.0, "min_pval", 1.0, + "name", ), "cc_promoter_sig": ( { @@ -209,6 +221,7 @@ def fileformat(db) -> QuerySet: 0.0, "poisson_pval", 1.0, + "name", ), "kemmeren": ( {"gene_id": "int", "M": "float", "Madj": "float", "A": "float", "pval": "float"}, @@ -217,6 +230,7 @@ def fileformat(db) -> QuerySet: 0.0, "pval", 1.0, + "gene_id", ), "mcisaac": ( { @@ -233,6 +247,7 @@ def fileformat(db) -> QuerySet: 0.0, "none", 1.0, + "gene_id", ), "bed6": ( {"chr": "str", "start": "int", "end": "int", "name": "str", "score": "float", "strand": "str"}, @@ -241,16 +256,19 @@ def fileformat(db) -> QuerySet: 0.0, "none", 1.0, + "name", ), - "rank_response_summary": ( + "rankresponse": ( { "feature": "str", - "expression_effect": "int", - "expression_pvalue": "int", - "binding_effect": "str", - "binding_pvalue": "str", + "expression_effect": "float", + "expression_pvalue": "float", + "expression_source": "str", + "binding_effect": "float", + "binding_pvalue": "float", + "binding_source": "str", "responsive": "int", - "ran_bin": "float", + "rank_bin": "int", "random": "float", }, ",", @@ -258,10 +276,11 @@ def fileformat(db) -> QuerySet: 0.0, "none", 1.0, + "feature", ), } for key, value in format_dict.items(): - fields, separator, effect, effect_thres, pval, pval_thres = value + fields, separator, effect, effect_thres, pval, pval_thres, feature_identifier_col = value FileFormatFactory.create( fileformat=key, fields=fields, @@ -270,6 +289,7 @@ def fileformat(db) -> QuerySet: default_effect_threshold=effect_thres, pval_col=pval, default_pvalue_threshold=pval_thres, + feature_identifier_col=feature_identifier_col, ) return FileFormat.objects.all() @@ -318,6 +338,7 @@ def harbison_datasource(db, fileformat: QuerySet) -> DataSource: def hu_datasource(db, fileformat: QuerySet) -> DataSource: array = fileformat.filter(fileformat="array").first() content = { + "id": 101, "name": "hu_reimann_tfko", "fileformat": array, "lab": "hu", @@ -344,12 +365,14 @@ def kemmeren_datasource(db, fileformat: QuerySet) -> DataSource: def mcisaac_datasource(db, fileformat: QuerySet) -> DataSource: mcisaac = fileformat.filter(fileformat="mcisaac").first() content = { + "id": 102, "name": "mcisaac_oe", "fileformat": mcisaac, "lab": "mcisaac", "assay": "overexpression", "workflow": "none", } + return DataSourceFactory(**content) @pytest.fixture @@ -384,3 +407,11 @@ def promotersetsig(db) -> PromoterSetSig: def regulator(db) -> Regulator: hap5_genomic_feature = GenomicFeatureFactory(locus_tag="YOR358W", symbol="HAP5") return RegulatorFactory(id=1, regulator=hap5_genomic_feature) + + +@pytest.fixture +def rankresponse(db, regulator: Regulator) -> QuerySet: + binding = BindingFactory(regulator=regulator) + expression = ExpressionFactory(regulator=regulator) + promotersetsig = PromoterSetSigFactory(binding=binding) + return RankResponseFactory(promotersetsig=promotersetsig, expression=expression) diff --git a/yeastregulatorydb/regulatory_data/api/filters/RankResponseFilter.py b/yeastregulatorydb/regulatory_data/api/filters/RankResponseFilter.py index 870db02..a6071f2 100644 --- a/yeastregulatorydb/regulatory_data/api/filters/RankResponseFilter.py +++ b/yeastregulatorydb/regulatory_data/api/filters/RankResponseFilter.py @@ -13,12 +13,15 @@ class RankResponseFilter(django_filters.FilterSet): expression_id = django_filters.NumberFilter(field_name="expression__id") expression_source = django_filters.CharFilter(field_name="expression__source__name", lookup_expr="iexact") regulator_locus_tag = django_filters.CharFilter( - field_name="expression__regulator__locus_tag", lookup_expr="iexact" + field_name="expression__regulator__regulator__locus_tag", lookup_expr="iexact" + ) + regulator_symbol = django_filters.CharFilter( + field_name="expression__regulator__regulator__symbol", lookup_expr="iexact" ) - regulator_symbol = django_filters.CharFilter(field_name="expression__regulator__symbol", lookup_expr="iexact") expression_effect_threshold = django_filters.NumberFilter() expression_pvalue_threshold = django_filters.NumberFilter() normalized = django_filters.BooleanFilter() + significant_response = django_filters.BooleanFilter() class Meta: model = RankResponse @@ -34,4 +37,5 @@ class Meta: "expression_effect_threshold", "expression_pvalue_threshold", "normalized", + "significant_response", ] diff --git a/yeastregulatorydb/regulatory_data/api/filters/__init__.py b/yeastregulatorydb/regulatory_data/api/filters/__init__.py index 08c1969..c647afa 100644 --- a/yeastregulatorydb/regulatory_data/api/filters/__init__.py +++ b/yeastregulatorydb/regulatory_data/api/filters/__init__.py @@ -8,4 +8,20 @@ from .GenomicFeatureFilter import GenomicFeatureFilter from .PromoterSetFilter import PromoterSetFilter from .PromoterSetSigFilter import PromoterSetSigFilter +from .RankResponseFilter import RankResponseFilter from .RegulatorFilter import RegulatorFilter + +__all__ = [ + "BindingFilter", + "BindingManualQCFilter", + "CallingCardsBackgroundFilter", + "DataSourceFilter", + "ExpressionFilter", + "ExpressionManualQCFilter", + "FileFormatFilter", + "GenomicFeatureFilter", + "PromoterSetFilter", + "PromoterSetSigFilter", + "RankResponseFilter", + "RegulatorFilter", +] diff --git a/yeastregulatorydb/regulatory_data/api/serializers/PromoterSetSerializer.py b/yeastregulatorydb/regulatory_data/api/serializers/PromoterSetSerializer.py index 1240d8c..9f25046 100644 --- a/yeastregulatorydb/regulatory_data/api/serializers/PromoterSetSerializer.py +++ b/yeastregulatorydb/regulatory_data/api/serializers/PromoterSetSerializer.py @@ -1,5 +1,3 @@ -import pandas as pd -from django.conf import settings from rest_framework import serializers from ...models.PromoterSet import PromoterSet diff --git a/yeastregulatorydb/regulatory_data/api/serializers/__init__.py b/yeastregulatorydb/regulatory_data/api/serializers/__init__.py index ab016c4..ad8d2c8 100644 --- a/yeastregulatorydb/regulatory_data/api/serializers/__init__.py +++ b/yeastregulatorydb/regulatory_data/api/serializers/__init__.py @@ -24,5 +24,6 @@ "GenomicFeatureSerializer", "PromoterSetSerializer", "PromoterSetSigSerializer", - "RankResponseSerializer" "RegulatorSerializer", + "RankResponseSerializer", + "RegulatorSerializer", ] diff --git a/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py b/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py index c5256df..a3dbaf6 100644 --- a/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py @@ -34,9 +34,9 @@ def perform_create(self, serializer): task_type = "callingcards_promoter_sig" if task_type: - lock_id = f"add_data_lock" - acquire_lock = lambda: cache.add(lock_id, True, timeout=60 * 60) - release_lock = lambda: cache.delete(lock_id) + lock_id = "add_data_lock" + acquire_lock = lambda: cache.add(lock_id, True, timeout=60 * 60) # flake8: noqa: E731 + release_lock = lambda: cache.delete(lock_id) # flake8: noqa: E731 if acquire_lock(): try: diff --git a/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py b/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py index 55c88bd..8810195 100644 --- a/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py @@ -1,4 +1,3 @@ -from celery import chain from django.core.cache import cache from django_filters.rest_framework import DjangoFilterBackend from rest_framework import viewsets @@ -32,9 +31,9 @@ def perform_create(self, serializer): ).values_list("id", flat=True) # Create a chain of tasks for each promotersetsig_id - lock_id = f"add_data_lock" - acquire_lock = lambda: cache.add(lock_id, True, timeout=60 * 60) - release_lock = lambda: cache.delete(lock_id) + lock_id = "add_data_lock" + acquire_lock = lambda: cache.add(lock_id, True, timeout=60 * 60) # flake8: noqa: E731 + release_lock = lambda: cache.delete(lock_id) # flake8: noqa: E731 if acquire_lock(): try: diff --git a/yeastregulatorydb/regulatory_data/api/views/PromoterSetViewSet.py b/yeastregulatorydb/regulatory_data/api/views/PromoterSetViewSet.py index 2c5a90a..9bbe553 100644 --- a/yeastregulatorydb/regulatory_data/api/views/PromoterSetViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/PromoterSetViewSet.py @@ -28,7 +28,7 @@ class PromoterSetViewSet(UpdateModifiedMixin, viewsets.ModelViewSet): def perform_create(self, serializer): instance = serializer.save() - lock_id = f"add_data_lock" + lock_id = "add_data_lock" acquire_lock = lambda: cache.add(lock_id, True, timeout=60 * 60) release_lock = lambda: cache.delete(lock_id) diff --git a/yeastregulatorydb/regulatory_data/api/views/__init__.py b/yeastregulatorydb/regulatory_data/api/views/__init__.py index 7d76f1b..71b9b00 100644 --- a/yeastregulatorydb/regulatory_data/api/views/__init__.py +++ b/yeastregulatorydb/regulatory_data/api/views/__init__.py @@ -9,6 +9,7 @@ from .GenomicFeatureViewSet import GenomicFeatureViewSet from .PromoterSetSigViewSet import PromoterSetSigViewSet from .PromoterSetViewSet import PromoterSetViewSet +from .RankResponseViewSet import RankResponseViewSet from .RegulatorViewSet import RegulatorViewSet __all__ = [ @@ -23,5 +24,6 @@ "GenomicFeatureViewSet", "PromoterSetSigViewSet", "PromoterSetViewSet", + "RankResponseViewSet", "RegulatorViewSet", ] diff --git a/yeastregulatorydb/regulatory_data/api/views/mixins/BulkUploadMixin.py b/yeastregulatorydb/regulatory_data/api/views/mixins/BulkUploadMixin.py index 69fbdca..b649699 100644 --- a/yeastregulatorydb/regulatory_data/api/views/mixins/BulkUploadMixin.py +++ b/yeastregulatorydb/regulatory_data/api/views/mixins/BulkUploadMixin.py @@ -17,7 +17,7 @@ def bulk_upload(self, request): file_mapping = {file.name: file for file in files} df = pd.read_csv(csv_file) - if not "file" in df.columns: + if "file" not in df.columns: return Response("Column 'file' not found in CSV", status=status.HTTP_400_BAD_REQUEST) serializer_list = [] diff --git a/yeastregulatorydb/regulatory_data/forms/__init__.py b/yeastregulatorydb/regulatory_data/forms/__init__.py index 315f2a6..9db51af 100644 --- a/yeastregulatorydb/regulatory_data/forms/__init__.py +++ b/yeastregulatorydb/regulatory_data/forms/__init__.py @@ -1 +1,5 @@ from .BulkUploadForm import BulkUploadForm + +__all__ = [ + "BulkUploadForm", +] diff --git a/yeastregulatorydb/regulatory_data/migrations/0004_fileformat_default_effect_threshold_and_more.py b/yeastregulatorydb/regulatory_data/migrations/0004_fileformat_default_effect_threshold_and_more.py new file mode 100644 index 0000000..45dac8b --- /dev/null +++ b/yeastregulatorydb/regulatory_data/migrations/0004_fileformat_default_effect_threshold_and_more.py @@ -0,0 +1,122 @@ +# Generated by Django 4.2.8 on 2023-12-19 15:41 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import yeastregulatorydb.regulatory_data.models.mixins.GzipFileUploadWithIdMixin + + +class Migration(migrations.Migration): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("regulatory_data", "0003_datasource_name_alter_binding_regulator_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="fileformat", + name="default_effect_threshold", + field=models.FloatField( + default=0.0, help_text="The default threshold for the effect column. Defaults to 0.0." + ), + ), + migrations.AddField( + model_name="fileformat", + name="default_pvalue_threshold", + field=models.FloatField( + default=1.0, help_text="The default threshold for the p-value column. Defaults to 1.0." + ), + ), + migrations.AddField( + model_name="fileformat", + name="feature_identifier_col", + field=models.CharField( + default="none", + help_text="The name of the column that should be used as the default feature identifier column. Eg 'name'. Defaults to 'none'.", + max_length=40, + ), + ), + migrations.CreateModel( + name="RankResponse", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("upload_date", models.DateField(auto_now_add=True)), + ("modified_date", models.DateTimeField(auto_now=True)), + ( + "expression_effect_threshold", + models.FloatField( + default=0, + help_text="The threshold (absolute value) at which to label a gene as 'responsive' in the expression data. Works in conjunction with `expression_pvalue_threshold'. Default is 0", + ), + ), + ( + "expression_pvalue_threshold", + models.FloatField( + default=1, + help_text="The threshold at which to label a gene as 'responsive' in the expression data. Works in conjunction with `expression_effect_threshold`. Default is 1", + ), + ), + ( + "normalized", + models.BooleanField( + default=False, + help_text="This indicates whether the data has been normalized to have the same number of responsive genes across expression data sets. Default is False. WARNING: not yet implemented -- all are `False`", + ), + ), + ( + "file", + models.FileField( + help_text="A file which stores the rank response data for a given binding and expression set for a given regulator at specific expression effect and pvalue thresholds", + upload_to="temp", + ), + ), + ( + "expression", + models.ForeignKey( + help_text="foreign key to the 'Expression' table", + on_delete=django.db.models.deletion.CASCADE, + to="regulatory_data.expression", + ), + ), + ( + "fileformat", + models.ForeignKey( + help_text="foreign key to the 'FileFormat' table", + on_delete=django.db.models.deletion.CASCADE, + to="regulatory_data.fileformat", + ), + ), + ( + "modifier", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="%(class)s_modifier", + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "promotersetsig", + models.ForeignKey( + help_text="foreign key to the 'PromoterSetSig' table", + on_delete=django.db.models.deletion.CASCADE, + to="regulatory_data.promotersetsig", + ), + ), + ( + "uploader", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="%(class)s_uploader", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "db_table": "rankresponse", + }, + bases=( + models.Model, + yeastregulatorydb.regulatory_data.models.mixins.GzipFileUploadWithIdMixin.GzipFileUploadWithIdMixin, + ), + ), + ] diff --git a/yeastregulatorydb/regulatory_data/migrations/0005_rankresponse_significant_response.py b/yeastregulatorydb/regulatory_data/migrations/0005_rankresponse_significant_response.py new file mode 100644 index 0000000..e174c75 --- /dev/null +++ b/yeastregulatorydb/regulatory_data/migrations/0005_rankresponse_significant_response.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.8 on 2023-12-20 20:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("regulatory_data", "0004_fileformat_default_effect_threshold_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="rankresponse", + name="significant_response", + field=models.BooleanField( + default=False, + help_text="This field is used to indicate whether there are any bins in the top 250 genes with a confidence interval that does not include 0", + ), + ), + ] diff --git a/yeastregulatorydb/regulatory_data/models/BaseModel.py b/yeastregulatorydb/regulatory_data/models/BaseModel.py index be8f762..15202b2 100644 --- a/yeastregulatorydb/regulatory_data/models/BaseModel.py +++ b/yeastregulatorydb/regulatory_data/models/BaseModel.py @@ -1,8 +1,5 @@ from django.conf import settings from django.db import models -from django.db.models.signals import pre_save -from django.dispatch import receiver -from django.utils import timezone class BaseModel(models.Model): diff --git a/yeastregulatorydb/regulatory_data/models/Binding.py b/yeastregulatorydb/regulatory_data/models/Binding.py index 00019ac..f3fb806 100644 --- a/yeastregulatorydb/regulatory_data/models/Binding.py +++ b/yeastregulatorydb/regulatory_data/models/Binding.py @@ -68,7 +68,7 @@ def save(self, *args, **kwargs): # Store the old file path old_file_name = self.file.name if self.file else None super().save(*args, **kwargs) - self.update_file_name("file", f"binding/{self.source}") + self.update_file_name("file", f"binding/{self.source.name}") new_file_name = self.file.name super().save(update_fields=["file"]) # If the file name changed, delete the old file diff --git a/yeastregulatorydb/regulatory_data/models/Expression.py b/yeastregulatorydb/regulatory_data/models/Expression.py index e11a8bc..718bf93 100644 --- a/yeastregulatorydb/regulatory_data/models/Expression.py +++ b/yeastregulatorydb/regulatory_data/models/Expression.py @@ -59,7 +59,7 @@ def save(self, *args, **kwargs): # Store the old file path old_file_name = self.file.name if self.file else None super().save(*args, **kwargs) - self.update_file_name("file", f"expression/{self.source}", "tsv.gz") + self.update_file_name("file", f"expression/{self.source.name}", "tsv.gz") new_file_name = self.file.name super().save(update_fields=["file"]) # If the file name changed, delete the old file diff --git a/yeastregulatorydb/regulatory_data/models/FileFormat.py b/yeastregulatorydb/regulatory_data/models/FileFormat.py index 541bff3..309681a 100644 --- a/yeastregulatorydb/regulatory_data/models/FileFormat.py +++ b/yeastregulatorydb/regulatory_data/models/FileFormat.py @@ -1,8 +1,6 @@ import logging from django.db import models -from django.db.models.signals import pre_save -from django.dispatch import receiver from .BaseModel import BaseModel diff --git a/yeastregulatorydb/regulatory_data/models/GenomicFeature.py b/yeastregulatorydb/regulatory_data/models/GenomicFeature.py index da56ead..c5d77ef 100644 --- a/yeastregulatorydb/regulatory_data/models/GenomicFeature.py +++ b/yeastregulatorydb/regulatory_data/models/GenomicFeature.py @@ -160,6 +160,4 @@ class Meta: name="start_cannot_be_less_than_one", ), ] - indexes = [ - models.Index("chr", "start", "end", "strand", name="coord_index") - ] + indexes = [models.Index("chr", "start", "end", "strand", name="coord_index")] diff --git a/yeastregulatorydb/regulatory_data/models/RankResponse.py b/yeastregulatorydb/regulatory_data/models/RankResponse.py index 78d486f..b56c60a 100644 --- a/yeastregulatorydb/regulatory_data/models/RankResponse.py +++ b/yeastregulatorydb/regulatory_data/models/RankResponse.py @@ -49,6 +49,11 @@ class RankResponse(BaseModel, GzipFileUploadWithIdMixin): "binding and expression set for a given regulator at specific " "expression effect and pvalue thresholds", ) + significant_response = models.BooleanField( + help_text="This field is used to indicate whether there are any bins " + "in the top 250 genes with a confidence interval that does not include 0", + default=False, + ) def __str__(self): return f"pk:{self.pk};promotersetsig:{self.binding};expression:{self.expression}" diff --git a/yeastregulatorydb/regulatory_data/tasks/__init__.py b/yeastregulatorydb/regulatory_data/tasks/__init__.py index c9d0034..2b8f58a 100644 --- a/yeastregulatorydb/regulatory_data/tasks/__init__.py +++ b/yeastregulatorydb/regulatory_data/tasks/__init__.py @@ -1,2 +1,4 @@ from .promoter_significance_task import promoter_significance_task from .rank_response_task import rank_response_task, rank_response_tasks + +__all__ = ["promoter_significance_task", "rank_response_task", "rank_response_tasks"] diff --git a/yeastregulatorydb/regulatory_data/tasks/rank_response_task.py b/yeastregulatorydb/regulatory_data/tasks/rank_response_task.py index cc60d7a..70074f9 100644 --- a/yeastregulatorydb/regulatory_data/tasks/rank_response_task.py +++ b/yeastregulatorydb/regulatory_data/tasks/rank_response_task.py @@ -3,13 +3,15 @@ import logging import tempfile import uuid +from types import SimpleNamespace from callingcardstools.Analysis.yeast import rank_response from django.contrib.auth import get_user_model from django.core.files import File from config import celery_app -from yeastregulatorydb.regulatory_data.models import Expression, FileFormat, PromoterSetSig, RankResponse +from yeastregulatorydb.regulatory_data.api.serializers import RankResponseSerializer +from yeastregulatorydb.regulatory_data.models import Expression, FileFormat, PromoterSetSig from yeastregulatorydb.regulatory_data.utils.extract_file_from_storage import extract_file_from_storage logger = logging.getLogger(__name__) @@ -39,16 +41,9 @@ def rank_response_task( raise ValueError(f"Binding record with id {promotersetsig_id} does not exist") try: - rankresponse_summary_fileformat_record = FileFormat.objects.get(fileformat="rank_repsonse_summary") + rankresponse_format = FileFormat.objects.get(fileformat="rankresponse") except FileFormat.DoesNotExist: - raise ValueError(f"FileFormat 'rank_response_summary' does not exist") - - try: - binding_expression_annotated_fileformat_record = FileFormat.objects.get( - fileformat="binding_expression_annotated" - ) - except FileFormat.DoesNotExist: - raise ValueError(f"FileFormat 'binding_expression_annotated' does not exist") + raise ValueError("FileFormat 'rank_response_summary' does not exist") with tempfile.TemporaryDirectory() as tmpdir: promotersetsig_filepath = extract_file_from_storage(promotersetsig_record.file, tmpdir) @@ -63,11 +58,16 @@ def rank_response_task( for record in expression_objects_iterator: expression_filepath = extract_file_from_storage(record.file, tmpdir) + expr_pval_thres = kwargs.get( + "expression_pvalue_threshold", record.source.fileformat.default_pvalue_threshold + ) + expr_pval_thres = None if expr_pval_thres == 1.0 else expr_pval_thres + config_dict = { "binding_data_path": promotersetsig_filepath, - "binding_identifier_col": promotersetsig_record.binding.source.fileformat.feature_identifier_col, - "binding_effect_col": promotersetsig_record.binding.source.fileformat, - "binding_pvalue_col": promotersetsig_record.binding.source.fileformat, + "binding_identifier_col": promotersetsig_record.fileformat.feature_identifier_col, + "binding_effect_col": promotersetsig_record.fileformat.effect_col, + "binding_pvalue_col": promotersetsig_record.fileformat.pval_col, "binding_source": promotersetsig_record.binding.source.name, "expression_data_path": expression_filepath, "expression_identifier_col": record.source.fileformat.feature_identifier_col, @@ -77,9 +77,7 @@ def rank_response_task( "expression_effect_thres": kwargs.get( "expression_effect_threshold", record.source.fileformat.default_effect_threshold ), - "expression_effect_thres": kwargs.get( - "expression_pvalue_threshold", record.source.fileformat.default_pvalue_threshold - ), + "expression_pvalue_thres": expr_pval_thres, "normalize": kwargs.get("normalize", False), "rank_bin_size": kwargs.get("rank_bin_size", 5), } @@ -126,7 +124,7 @@ def rank_response_task( raise ValueError(error_message) try: - binding_expr_annotated_df, random_df, rank_response_df = rank_response.rank_response_ratio_summarize( + binding_expr_annotated_df, _, rank_response_summary_df = rank_response.rank_response_ratio_summarize( df, effect_expression_thres=args["expression_effect_thres"], p_expression_thres=args["expression_pvalue_thres"], @@ -137,44 +135,47 @@ def rank_response_task( logger.error("Error summarizing data: %s", exc) raise - results_list.append((record, binding_expr_annotated_df)) + results_list.append((record, binding_expr_annotated_df, rank_response_summary_df)) output_list = [] for result_tuple in results_list: # extract the dataframes from the output tuple - expression_record, binding_expr_annotated_df = result_tuple + expression_record, binding_expr_annotated_df, rank_response_summary_df = result_tuple # create a buffer to store the dataframe binding_expr_annotated_buffer = io.BytesIO() with gzip.GzipFile(fileobj=binding_expr_annotated_buffer, mode="wb") as gzipped_file: - binding_expr_annotated_df.df.to_csv(gzipped_file, index=False) + binding_expr_annotated_df.to_csv(gzipped_file, index=False) # Reset buffer position binding_expr_annotated_buffer.seek(0) # Create a Django File object with a uuid filename binding_expr_annotated_file = File(binding_expr_annotated_buffer, name=f"{uuid.uuid4()}.csv.gz") - rank_response_buffer = io.BytesIO() - with gzip.GzipFile(fileobj=rank_response_buffer, mode="wb") as gzipped_file: - rank_response_df.df.to_csv(gzipped_file, index=False) - # Reset buffer position - rank_response_buffer.seek(0) - # Create a Django File object with a uuid filename - rank_response_file = File(rank_response_buffer, name=f"{uuid.uuid4()}.csv.gz") - - rankresponse_record = RankResponse.objects.create( - user=user, - promotersetsig=promotersetsig_record, - expression=expression_record, - fileformat=rank_response_file, - file=binding_expr_annotated_file, + # if there is any value in ci_lower greater than 0 in the first + # 100 rows, then the rank response test passes + rank_response_pass = rank_response_summary_df["ci_lower"].head(50).gt(0).any() + + upload_data = { + "promotersetsig": promotersetsig_record.pk, + "expression": expression_record.pk, + "fileformat": rankresponse_format.pk, + "file": binding_expr_annotated_file, + "significant_response": rank_response_pass, + } + + # Create a mock request with only a user attribute + # Assuming you have the user_id available + mock_request = SimpleNamespace(user=user) + # serialize the PromoterSetSig object + serializer = RankResponseSerializer( + data=upload_data, + context={"request": mock_request}, ) - # serialize the PromoterSetSig object - serializer = RankResponse(rankresponse_record) # validate the serializer serializer.is_valid(raise_exception=True) # save the serializer - serializer.save() + instance = serializer.save() # add the id to the output list - output_list.append(rankresponse_record.id) + output_list.append(instance.id) return output_list diff --git a/yeastregulatorydb/regulatory_data/tests/factories.py b/yeastregulatorydb/regulatory_data/tests/factories.py index 0efcecb..54abb73 100644 --- a/yeastregulatorydb/regulatory_data/tests/factories.py +++ b/yeastregulatorydb/regulatory_data/tests/factories.py @@ -1,5 +1,5 @@ import faker -from factory import Faker, LazyFunction, Sequence, SubFactory +from factory import Faker, LazyFunction, SubFactory from factory.django import DjangoModelFactory, FileField from yeastregulatorydb.users.tests.factories import UserFactory @@ -16,6 +16,7 @@ GenomicFeature, PromoterSet, PromoterSetSig, + RankResponse, Regulator, ) @@ -220,3 +221,20 @@ class PromoterSetSigFactory(DjangoModelFactory): class Meta: model = PromoterSetSig django_get_or_create = ["binding", "promoter", "background"] + + +class RankResponseFactory(DjangoModelFactory): + uploader = SubFactory(UserFactory) + modifier = SubFactory(UserFactory) + promotersetsig = SubFactory(PromoterSetSigFactory) + expression = SubFactory(ExpressionFactory) + expression_effect_threshold = 0.0 + expression_pvalue_threshold = 1.0 + fileformat = SubFactory(FileFormatFactory) + normalized = False + file = FileField(filename="rankresponse.csv.gz") + significant_response = Faker("pybool") + + class Meta: + model = RankResponse + django_get_or_create = ["promotersetsig", "expression"] diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/28366_yiming_promoter_sig.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/28366_yiming_promoter_sig.csv.gz index cfafe7b..3bf4e8c 100644 Binary files a/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/28366_yiming_promoter_sig.csv.gz and b/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/28366_yiming_promoter_sig.csv.gz differ diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/expression_bulk_upload.csv b/yeastregulatorydb/regulatory_data/tests/test_data/expression_bulk_upload.csv new file mode 100644 index 0000000..cb30e57 --- /dev/null +++ b/yeastregulatorydb/regulatory_data/tests/test_data/expression_bulk_upload.csv @@ -0,0 +1,3 @@ +regulator,source,source_orig_id,file,replicate +1,102,,hap5_15min_mcisaac_chr1.csv.gz,1 +2,101,,hap5_hu_chr1.csv.gz,1 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/rank_response_config.json b/yeastregulatorydb/regulatory_data/tests/test_data/rank_response_config.json new file mode 100644 index 0000000..abb7ff0 --- /dev/null +++ b/yeastregulatorydb/regulatory_data/tests/test_data/rank_response_config.json @@ -0,0 +1 @@ +{"binding_data_path": "/home/oguzkhan/code/callingCardsTools/tests/test_data/yeast/Analysis/hap5_exprid_17_yiming_adh1_promoter_sig.csv.gz", "binding_source": "cc_17", "binding_identifier_col": "target_gene_id", "binding_effect_col": "callingcards_enrichment", "binding_pvalue_col": "poisson_pval", "rank_by_effect": false, "expression_data_path": "/home/oguzkhan/code/callingCardsTools/tests/test_data/yeast/Analysis/hap5_15min_mcisaac.csv.gz", "expression_source": "mcisaac_hap5_15", "expression_identifier_col": "gene_id", "expression_effect_col": "log2_shrunken_timecourses", "expression_effect_thres": 0.0, "expression_pvalue_col": null, "expression_pvalue_thres": null, "rank_bin_size": 5, "normalize": false, "output_file": "/tmp/pytest-of-oguzkhan/pytest-0/test_validate_config0/rank_response.csv", "compress": false} \ No newline at end of file diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/rankresponse/rank_response.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/rankresponse/rank_response.csv.gz new file mode 100644 index 0000000..45c9e1e Binary files /dev/null and b/yeastregulatorydb/regulatory_data/tests/test_data/rankresponse/rank_response.csv.gz differ diff --git a/yeastregulatorydb/regulatory_data/tests/test_filters.py b/yeastregulatorydb/regulatory_data/tests/test_filters.py index 5c450f2..70dfa4b 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_filters.py +++ b/yeastregulatorydb/regulatory_data/tests/test_filters.py @@ -11,13 +11,13 @@ GenomicFeatureFilter, PromoterSetFilter, PromoterSetSigFilter, + RankResponseFilter, RegulatorFilter, ) from yeastregulatorydb.regulatory_data.models import ( Binding, BindingManualQC, CallingCardsBackground, - ChrMap, DataSource, Expression, ExpressionManualQC, @@ -25,6 +25,7 @@ GenomicFeature, PromoterSet, PromoterSetSig, + RankResponse, Regulator, ) @@ -40,6 +41,7 @@ GenomicFeatureFactory, PromoterSetFactory, PromoterSetSigFactory, + RankResponseFactory, RegulatorFactory, ) @@ -440,6 +442,47 @@ def test_promoter_set_sig_filter(): assert promoter_set_sig2 not in f.qs +@pytest.mark.django_db +def test_rankresponse_filter(): + # Create some RankResponse instances using the factory + genomicfeature1 = GenomicFeatureFactory(locus_tag="tag1", symbol="symbol1") + genomicfeature2 = GenomicFeatureFactory(locus_tag="tag2", symbol="symbol2") + regulator1 = RegulatorFactory(regulator=genomicfeature1) + regulator2 = RegulatorFactory(regulator=genomicfeature2) + binding1 = BindingFactory(regulator=regulator1) + binding2 = BindingFactory(regulator=regulator2) + promotersetsig1 = PromoterSetSigFactory(binding=binding1) + promotersetsig2 = PromoterSetSigFactory(binding=binding2) + expression1 = ExpressionFactory(regulator=regulator1) + expression2 = ExpressionFactory(regulator=regulator2) + + rankresponse1 = RankResponseFactory( + id=1, + promotersetsig=promotersetsig1, + expression=expression1, + ) + rankresponse2 = RankResponseFactory( + id=2, + promotersetsig=promotersetsig2, + expression=expression2, + ) + + # Define the filter parameters and their expected values + filter_params = [ + {"id": 1}, + {"regulator_locus_tag": promotersetsig1.binding.regulator.regulator.locus_tag}, + {"regulator_symbol": promotersetsig1.binding.regulator.regulator.symbol}, + {"binding_source": promotersetsig1.binding.source.name}, + {"expression_source": expression1.source.name}, + ] + + # Apply each filter and check if it returns the expected RankResponse instances + for params in filter_params: + f = RankResponseFilter(params, queryset=RankResponse.objects.all()) + assert rankresponse1 in f.qs + assert rankresponse2 not in f.qs + + @pytest.mark.django_db def test_regulator_filter(): # Create some Regulator instances using the factory diff --git a/yeastregulatorydb/regulatory_data/tests/test_models.py b/yeastregulatorydb/regulatory_data/tests/test_models.py index 8b31b6c..1ae977d 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_models.py +++ b/yeastregulatorydb/regulatory_data/tests/test_models.py @@ -13,17 +13,18 @@ GenomicFeature, PromoterSet, PromoterSetSig, + RankResponse, Regulator, ) def test_binding_get_absolute_url(binding: Binding): - assert reverse("api:binding-list") == f"/api/binding/" + assert reverse("api:binding-list") == "/api/binding/" assert reverse("api:binding-detail", args=[str(binding.id)]) == f"/api/binding/{binding.id}/" def test_bindingmanualqc_get_absolute_url(bindingmanualqc: BindingManualQC): - assert reverse("api:bindingmanualqc-list") == f"/api/bindingmanualqc/" + assert reverse("api:bindingmanualqc-list") == "/api/bindingmanualqc/" assert ( reverse("api:bindingmanualqc-detail", args=[str(bindingmanualqc.id)]) == f"/api/bindingmanualqc/{bindingmanualqc.id}/" @@ -31,12 +32,12 @@ def test_bindingmanualqc_get_absolute_url(bindingmanualqc: BindingManualQC): def test_datasource_get_absolute_url(datasource: DataSource): - assert reverse("api:datasource-list") == f"/api/datasource/" + assert reverse("api:datasource-list") == "/api/datasource/" assert reverse("api:datasource-detail", args=[str(datasource.id)]) == f"/api/datasource/{datasource.id}/" def test_callingcardsbackground_get_absolute_url(callingcardsbackground: CallingCardsBackground): - assert reverse("api:callingcardsbackground-list") == f"/api/callingcardsbackground/" + assert reverse("api:callingcardsbackground-list") == "/api/callingcardsbackground/" assert ( reverse("api:callingcardsbackground-detail", args=[str(callingcardsbackground.id)]) == f"/api/callingcardsbackground/{callingcardsbackground.id}/" @@ -44,18 +45,18 @@ def test_callingcardsbackground_get_absolute_url(callingcardsbackground: Calling def test_chrmap_get_absolute_url(chrmap: QuerySet): - assert reverse("api:chrmap-list") == f"/api/chrmap/" + assert reverse("api:chrmap-list") == "/api/chrmap/" chrmap_instance = ChrMap.objects.first() assert reverse("api:chrmap-detail", args=[str(chrmap_instance.id)]) == f"/api/chrmap/{chrmap_instance.id}/" def test_expression_get_absolute_url(expression: Expression): - assert reverse("api:expression-list") == f"/api/expression/" + assert reverse("api:expression-list") == "/api/expression/" assert reverse("api:expression-detail", args=[str(expression.id)]) == f"/api/expression/{expression.id}/" def test_expressionmanualqc_get_absolute_url(expressionmanualqc: ExpressionManualQC): - assert reverse("api:expressionmanualqc-list") == f"/api/expressionmanualqc/" + assert reverse("api:expressionmanualqc-list") == "/api/expressionmanualqc/" assert ( reverse("api:expressionmanualqc-detail", args=[str(expressionmanualqc.id)]) == f"/api/expressionmanualqc/{expressionmanualqc.id}/" @@ -63,13 +64,13 @@ def test_expressionmanualqc_get_absolute_url(expressionmanualqc: ExpressionManua def test_fileformat_get_absolute_url(fileformat: QuerySet): - assert reverse("api:fileformat-list") == f"/api/fileformat/" + assert reverse("api:fileformat-list") == "/api/fileformat/" bed6_id = FileFormat.objects.filter(fileformat="bed6").first().id assert reverse("api:fileformat-detail", args=[str(bed6_id)]) == f"/api/fileformat/{bed6_id}/" def test_genomicfeature_get_absolute_url(genomicfeature: GenomicFeature): - assert reverse("api:genomicfeature-list") == f"/api/genomicfeature/" + assert reverse("api:genomicfeature-list") == "/api/genomicfeature/" assert ( reverse("api:genomicfeature-detail", args=[str(genomicfeature.id)]) == f"/api/genomicfeature/{genomicfeature.id}/" @@ -77,12 +78,12 @@ def test_genomicfeature_get_absolute_url(genomicfeature: GenomicFeature): def test_promoterset_get_absolute_url(promoterset: PromoterSet): - assert reverse("api:promoterset-list") == f"/api/promoterset/" + assert reverse("api:promoterset-list") == "/api/promoterset/" assert reverse("api:promoterset-detail", args=[str(promoterset.id)]) == f"/api/promoterset/{promoterset.id}/" def test_promotersetsig_get_absolute_url(promotersetsig: PromoterSetSig): - assert reverse("api:promotersetsig-list") == f"/api/promotersetsig/" + assert reverse("api:promotersetsig-list") == "/api/promotersetsig/" assert ( reverse("api:promotersetsig-detail", args=[str(promotersetsig.id)]) == f"/api/promotersetsig/{promotersetsig.id}/" @@ -90,5 +91,11 @@ def test_promotersetsig_get_absolute_url(promotersetsig: PromoterSetSig): def test_regulator_get_absolute_url(regulator: Regulator): - assert reverse("api:regulator-list") == f"/api/regulator/" + assert reverse("api:regulator-list") == "/api/regulator/" assert reverse("api:regulator-detail", args=[str(regulator.id)]) == f"/api/regulator/{regulator.id}/" + + +def test_rank_response_absolute_url(rankresponse: RankResponse): + assert reverse("api:rankresponse-list") == "/api/rankresponse/" + assert reverse("api:rankresponse-detail", args=[str(rankresponse.id)]) == f"/api/rankresponse/{rankresponse.id}/" + assert reverse("api:rankresponse-summary") == "/api/rankresponse/summary/" diff --git a/yeastregulatorydb/regulatory_data/tests/test_serializers.py b/yeastregulatorydb/regulatory_data/tests/test_serializers.py index c278e08..3ac4e60 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_serializers.py +++ b/yeastregulatorydb/regulatory_data/tests/test_serializers.py @@ -6,7 +6,7 @@ from rest_framework.exceptions import ValidationError from rest_framework.test import APIRequestFactory -from yeastregulatorydb.regulatory_data.models import ChrMap, DataSource, Regulator +from yeastregulatorydb.regulatory_data.models import DataSource, Expression, PromoterSetSig, Regulator from yeastregulatorydb.users.models import User from ..api.serializers import ( @@ -15,6 +15,7 @@ FileFormatSerializer, GenomicFeatureSerializer, PromoterSetSerializer, + RankResponseSerializer, ) from .factories import ( BindingFactory, @@ -23,6 +24,7 @@ FileFormatFactory, GenomicFeatureFactory, PromoterSetFactory, + RankResponseFactory, ) from .utils.model_to_dict_select import model_to_dict_select @@ -56,7 +58,7 @@ def test_BindingSerializerCC(user: User, chrmap: QuerySet, regulator: Regulator, serializer1 = BindingSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -85,7 +87,7 @@ def test_BindingSerializerChipExo(user: User, chrmap: QuerySet, regulator: Regul serializer1 = BindingSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -116,7 +118,7 @@ def test_BindingSerializerHarbison( serializer1 = BindingSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -147,7 +149,7 @@ def test_ExpressionSerializerKemmeren( serializer1 = ExpressionSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -176,7 +178,7 @@ def test_ExpressionSerializerHu(user: User, chrmap: QuerySet, regulator: Regulat serializer1 = ExpressionSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -205,7 +207,7 @@ def test_ExpressionSerializerMcIsaac(user: User, chrmap: QuerySet, regulator: Re serializer1 = ExpressionSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -235,7 +237,7 @@ def test_fileformat_serializer(user: User): # Serialize the FileFormat instance with the request in the context serializer1 = FileFormatSerializer(data=fileformat1, context={"request": request}) # Check that the serializer is valid - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors @pytest.mark.django_db @@ -253,7 +255,7 @@ def test_genomic_feature_serializer(user: User): # Serialize the GenomicFeature instance with the request in the context serializer1 = GenomicFeatureSerializer(data=genomic_feature1, context={"request": request}) # Check that the serializer is valid - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors # test that the serializer is invalid if start > end data.update({"start": 4, "end": 3}) @@ -307,4 +309,39 @@ def test_promoterset_serializer(tmpdir, user: User, chrmap: QuerySet): serializer1 = PromoterSetSerializer(data=data, context={"request": request}) - assert serializer1.is_valid() == True, serializer1.errors + assert serializer1.is_valid() is True, serializer1.errors + + +@pytest.mark.django_db +def test_rankresponse_serializer( + tmpdir, user: User, promotersetsig: PromoterSetSig, expression: Expression, fileformat: QuerySet +): + # Create a request instance + factory = APIRequestFactory() + request = factory.get("/") + # Authenticate the request + request.user = user + + # set path to test data and check that it exists + file_path = os.path.join(os.path.dirname(__file__), "test_data", "rankresponse/rank_response.csv.gz") + assert os.path.exists(file_path), f"path: {file_path}" + + rankresponse_fileformat = fileformat.get(fileformat="rankresponse") + + # Open the file and read its content + with open(file_path, "rb") as file_obj: + file_content = file_obj.read() + # Create a SimpleUploadedFile instance + uploaded_file = SimpleUploadedFile("rank_response.csv.gz", file_content, content_type="application/gzip") + + fields_dict = { + "file": uploaded_file, + "promotersetsig": promotersetsig, + "expression": expression, + "fileformat": rankresponse_fileformat, + } + data = model_to_dict_select(RankResponseFactory.build(**fields_dict)) + + serializer1 = RankResponseSerializer(data=data, context={"request": request}) + + assert serializer1.is_valid() is True, serializer1.errors diff --git a/yeastregulatorydb/regulatory_data/tests/test_tasks.py b/yeastregulatorydb/regulatory_data/tests/test_tasks.py index 25c91e4..7f5fea9 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_tasks.py +++ b/yeastregulatorydb/regulatory_data/tests/test_tasks.py @@ -2,15 +2,25 @@ import pytest from celery.result import EagerResult -from django.conf import settings from django.core.files.uploadedfile import SimpleUploadedFile from django.db.models.query import QuerySet from rest_framework.test import APIRequestFactory -from yeastregulatorydb.regulatory_data.api.serializers import BindingSerializer, PromoterSetSerializer -from yeastregulatorydb.regulatory_data.models import DataSource, Regulator -from yeastregulatorydb.regulatory_data.tasks.promoter_significance_task import promoter_significance_task -from yeastregulatorydb.regulatory_data.tests.factories import BindingFactory, PromoterSetFactory +from yeastregulatorydb.regulatory_data.api.serializers import ( + BindingSerializer, + ExpressionSerializer, + PromoterSetSerializer, + PromoterSetSigSerializer, +) +from yeastregulatorydb.regulatory_data.models import DataSource, PromoterSet, Regulator +from yeastregulatorydb.regulatory_data.tasks import promoter_significance_task, rank_response_task +from yeastregulatorydb.regulatory_data.tests.factories import ( + BindingFactory, + BindingManualQCFactory, + ExpressionFactory, + PromoterSetFactory, + PromoterSetSigFactory, +) from yeastregulatorydb.regulatory_data.tests.utils.model_to_dict_select import model_to_dict_select from yeastregulatorydb.users.models import User @@ -38,7 +48,7 @@ def test_promoter_significance_task( upload_file = SimpleUploadedFile("yiming_promoters_chrI.bed.gz", file_content, content_type="application/gzip") data = model_to_dict_select(PromoterSetFactory.build(name="yiming", file=upload_file)) serializer = PromoterSetSerializer(data=data, context={"request": request}) - assert serializer.is_valid() == True, serializer.errors + assert serializer.is_valid() is True, serializer.errors serializer.save() # create the chipexo Binding record @@ -54,7 +64,7 @@ def test_promoter_significance_task( BindingFactory.build(source=chipexo_datasource, regulator=regulator, file=upload_file) ) serializer = BindingSerializer(data=data, context={"request": request}) - assert serializer.is_valid() == True, serializer.errors + assert serializer.is_valid() is True, serializer.errors instance = serializer.save() settings.CELERY_TASK_ALWAYS_EAGER = True task_result = promoter_significance_task.delay( @@ -64,48 +74,70 @@ def test_promoter_significance_task( assert isinstance(task_result.result, list) -# @pytest.mark.djanbo_db -# def test_rank_response_task( -# settings, chrmap: QuerySet, fileformat: QuerySet, chipexo_datasource: DataSource, regulator: Regulator, user: User -# ): -# """test promoter_significance_task task""" -# # Create a request object and set the user -# factory = APIRequestFactory() -# request = factory.get("/") -# request.user = user - -# # create the promoter set record -# promoterset_path = os.path.join(os.path.dirname(__file__), "test_data", "yiming_promoters_chrI.bed.gz") -# assert os.path.exists(promoterset_path), f"path: {promoterset_path}" - -# # Open the file and read its content -# with open(promoterset_path, "rb") as file_obj: -# file_content = file_obj.read() -# # Create a SimpleUploadedFile instance -# upload_file = SimpleUploadedFile("yiming_promoters_chrI.bed.gz", file_content, content_type="application/gzip") -# data = model_to_dict_select(PromoterSetFactory.build(name="yiming", file=upload_file)) -# serializer = PromoterSetSerializer(data=data, context={"request": request}) -# assert serializer.is_valid() == True, serializer.errors -# serializer.save() - -# # create the chipexo Binding record -# file_path = os.path.join(os.path.dirname(__file__), "test_data", "binding/chipexo/28366_chrI.csv.gz") -# assert os.path.exists(file_path), f"path: {file_path}" - -# # Open the file and read its content -# with open(file_path, "rb") as file_obj: -# file_content = file_obj.read() -# # Create a SimpleUploadedFile instance -# upload_file = SimpleUploadedFile("28366_chrI.csv.gz", file_content, content_type="application/gzip") -# data = model_to_dict_select( -# BindingFactory.build(source=chipexo_datasource, regulator=regulator, file=upload_file) -# ) -# serializer = BindingSerializer(data=data, context={"request": request}) -# assert serializer.is_valid() == True, serializer.errors -# instance = serializer.save() -# settings.CELERY_TASK_ALWAYS_EAGER = True -# task_result = promoter_significance_task.delay( -# instance.id, request.user.id, settings.CHIPEXO_PROMOTER_SIG_FORMAT -# ) -# assert isinstance(task_result, EagerResult) -# assert isinstance(task_result.result, list) +@pytest.mark.djanbo_db +def test_rank_response_task( + settings, + chrmap: QuerySet, + fileformat: QuerySet, + promoterset: PromoterSet, + user: User, + regulator: Regulator, + chipexo_datasource: DataSource, + mcisaac_datasource: DataSource, +): + """test promoter_significance_task task""" + # Create a request object and set the user + factory = APIRequestFactory() + request = factory.get("/") + request.user = user + + # create the promoter set record + promotersetsig_path = os.path.join( + os.path.dirname(__file__), "test_data", "binding/chipexo/28366_yiming_promoter_sig.csv.gz" + ) + assert os.path.exists(promotersetsig_path), f"path: {promotersetsig_path}" + + expression_path = os.path.join( + os.path.dirname(__file__), "test_data", "expression/mcisaac/hap5_15min_mcisaac_chr1.csv.gz" + ) + assert os.path.exists(expression_path), f"path: {expression_path}" + + binding_record = BindingFactory.create(source=chipexo_datasource, regulator=regulator) + + BindingManualQCFactory.create(binding=binding_record) + + # Open the file and read its content + with open(promotersetsig_path, "rb") as promotersetsig_file_obj: + promotersetsig_file_content = promotersetsig_file_obj.read() + # Create a SimpleUploadedFile instance + promotersetsig_upload_file = SimpleUploadedFile( + "28366_yiming_promoter_sig.csv.gz", promotersetsig_file_content, content_type="application/gzip" + ) + promotersetsig_data = model_to_dict_select( + PromoterSetSigFactory.build( + file=promotersetsig_upload_file, + binding=binding_record, + promoter=promoterset, + fileformat=fileformat.get(fileformat="chipexo_promoter_sig"), + ) + ) + promotersetsig_serializer = PromoterSetSigSerializer(data=promotersetsig_data, context={"request": request}) + assert promotersetsig_serializer.is_valid() is True, promotersetsig_serializer.errors + promotersetsig_instance = promotersetsig_serializer.save() + + # Open the file and read its content + with open(expression_path, "rb") as file_obj: + file_content = file_obj.read() + # Create a SimpleUploadedFile instance + upload_file = SimpleUploadedFile("28366_chrI.csv.gz", file_content, content_type="application/gzip") + data = model_to_dict_select( + ExpressionFactory.build(source=mcisaac_datasource, regulator=binding_record.regulator, file=upload_file) + ) + serializer = ExpressionSerializer(data=data, context={"request": request}) + assert serializer.is_valid() is True, serializer.errors + serializer.save() + + settings.CELERY_TASK_ALWAYS_EAGER = True + task_result = rank_response_task.delay(promotersetsig_instance.id, request.user.id) + assert isinstance(task_result, EagerResult) + assert isinstance(task_result.result, list) diff --git a/yeastregulatorydb/regulatory_data/tests/test_utils.py b/yeastregulatorydb/regulatory_data/tests/test_utils.py index 5b617e4..19fae91 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_utils.py +++ b/yeastregulatorydb/regulatory_data/tests/test_utils.py @@ -4,7 +4,6 @@ import pytest from django.db.models.query import QuerySet -from yeastregulatorydb.regulatory_data.models import ChrMap from yeastregulatorydb.regulatory_data.utils.count_hops import count_hops diff --git a/yeastregulatorydb/regulatory_data/tests/test_views.py b/yeastregulatorydb/regulatory_data/tests/test_views.py index 768c64b..3727c9a 100644 --- a/yeastregulatorydb/regulatory_data/tests/test_views.py +++ b/yeastregulatorydb/regulatory_data/tests/test_views.py @@ -4,7 +4,7 @@ import pytest from django.core.files.uploadedfile import SimpleUploadedFile from django.db.models.query import QuerySet -from django.test import Client, RequestFactory +from django.test import RequestFactory from django.urls import reverse from rest_framework.authtoken.models import Token from rest_framework.test import APIClient, force_authenticate @@ -12,8 +12,8 @@ from yeastregulatorydb.users.models import User from ..api.views import ChrMapViewSet, GenomicFeatureViewSet -from ..models import Binding, ChrMap, DataSource, Regulator -from .factories import BindingFactory +from ..models import Binding, ChrMap, DataSource, Expression, Regulator +from .factories import BindingFactory, ExpressionFactory, RegulatorFactory from .utils.model_to_dict_select import model_to_dict_select @@ -102,8 +102,7 @@ def test_single_binding_upload(cc_datasource: DataSource, regulator: Regulator, assert response.status_code == 201, response.data assert Binding.objects.count() == 1 assert ( - re.match(r"binding\/brent_nf_core_callingcards_dev\/\d+\.qbed\.gz$", Binding.objects.get().file.name) - is not None + re.match(r"binding\/brent_nf_cc\/\d+\.qbed\.gz$", Binding.objects.get().file.name) is not None ), Binding.objects.get().file.name @@ -147,75 +146,81 @@ def test_bulk_binding_upload(chipexo_datasource: DataSource, regulator: Regulato @pytest.mark.django_db -def test_expression_single_upload( - expression_datasource: DataSource, regulator: Regulator, chrmap: QuerySet, user: User -): +def test_expression_single_upload(mcisaac_datasource: DataSource, regulator: Regulator, chrmap: QuerySet, user: User): token = Token.objects.get(user=user) client = APIClient() client.credentials(HTTP_AUTHORIZATION="Token " + token.key) - data = model_to_dict_select(BindingFactory.build()) + data = model_to_dict_select(ExpressionFactory.build()) # set path to test data and check that it exists - file_path = os.path.join(os.path.dirname(__file__), "test_data", "expression/hap5_expr17_chr1_ucsc.bed.gz") + file_path = os.path.join( + os.path.dirname(__file__), "test_data", "expression/mcisaac/hap5_15min_mcisaac_chr1.csv.gz" + ) assert os.path.exists(file_path), f"path: {file_path}" # Open the file and read its content with open(file_path, "rb") as file_obj: file_content = file_obj.read() # Create a SimpleUploadedFile instance - upload_file = SimpleUploadedFile("hap5_expr17_chrI.bed.gz", file_content, content_type="application/gzip") + upload_file = SimpleUploadedFile( + "hap5_15min_mcisaac_chr1.csv.gz", file_content, content_type="application/gzip" + ) data["file"] = upload_file - data["source"] = expression_datasource.id + data["source"] = mcisaac_datasource.id data["regulator"] = regulator.id - response = client.post(reverse("api:binding-list"), data, format="multipart") + response = client.post(reverse("api:expression-list"), data, format="multipart") assert response.status_code == 201, response.data - assert Binding.objects.count() == 1 + assert Expression.objects.count() == 1 assert ( - re.match(r"binding\/brent_nf_core_expression_dev\/\d+\.bed\.gz$", Binding.objects.get().file.name) - is not None - ), Binding.objects.get().file.name + re.match(r"expression\/mcisaac_oe\/\d+\.csv\.gz$", Expression.objects.get().file.name) is not None + ), Expression.objects.get().file.name @pytest.mark.django_db -def test_expression_bulk_upload(expression_datasource: DataSource, regulator: Regulator, chrmap: QuerySet, user: User): +def test_expression_bulk_upload( + chrmap: QuerySet, hu_datasource: DataSource, mcisaac_datasource: DataSource, user: User +): token = Token.objects.get(user=user) client = APIClient() client.credentials(HTTP_AUTHORIZATION="Token " + token.key) + RegulatorFactory.create(id=1) + RegulatorFactory.create(id=2) + # set path to test data and check that it exists base_path = os.path.join(os.path.dirname(__file__), "test_data") assert os.path.exists(base_path), f"path: {base_path}" - csv_path = os.path.join(base_path, "binding_bulk_expression_upload.csv") + csv_path = os.path.join(base_path, "expression_bulk_upload.csv") assert os.path.exists(csv_path), f"path: {csv_path}" - expression_file1_path = os.path.join(base_path, "expression/hap5_expr17_chrI.bed.gz") - assert os.path.exists(expression_file1_path), f"path: {expression_file1_path}" + expression_filepath1 = os.path.join( + os.path.dirname(__file__), "test_data", "expression/mcisaac/hap5_15min_mcisaac_chr1.csv.gz" + ) + assert os.path.exists(expression_filepath1), f"path: {expression_filepath1}" - expression_file2_path = os.path.join(base_path, "expression/hap5_expr18_chrI.bed.gz") - assert os.path.exists(expression_file2_path), f"path: {expression_file2_path}" + expression_filepath2 = os.path.join(os.path.dirname(__file__), "test_data", "expression/hu/hap5_hu_chr1.csv.gz") + assert os.path.exists(expression_filepath2), f"path: {expression_filepath2}" csv_handle = open(csv_path, "rb") - expression_file1_handle = open(expression_file1_path, "rb") - expression_file2_handle = open(expression_file2_path, "rb") + expression_file_handle1 = open(expression_filepath1, "rb") + expression_file_handle2 = open(expression_filepath2, "rb") data = { "csv_file": SimpleUploadedFile("bulk_upload.csv", csv_handle.read(), content_type="text/csv"), "files": [ SimpleUploadedFile( - "hap5_expr17_chrI.bed.gz", expression_file1_handle.read(), content_type="application/gzip" - ), - SimpleUploadedFile( - "hap5_expr18_chrI.bed.gz", expression_file2_handle.read(), content_type="application/gzip" + "hap5_15min_mcisaac_chr1.csv.gz", expression_file_handle1.read(), content_type="application/gzip" ), + SimpleUploadedFile("hap5_hu_chr1.csv.gz", expression_file_handle2.read(), content_type="application/gzip"), ], } - response = client.post(reverse("api:binding-bulk-upload"), data, format="multipart") + response = client.post(reverse("api:expression-bulk-upload"), data, format="multipart") assert response.status_code == 201, response.data - assert Binding.objects.count() == 2, Binding.objects.count() + assert Expression.objects.count() == 2, Expression.objects.count() csv_handle.close() - expression_file1_handle.close() - expression_file2_handle.close() + expression_file_handle1.close() + expression_file_handle2.close() diff --git a/yeastregulatorydb/regulatory_data/utils/extract_file_from_storage.py b/yeastregulatorydb/regulatory_data/utils/extract_file_from_storage.py index e5e1fba..5e00ab3 100644 --- a/yeastregulatorydb/regulatory_data/utils/extract_file_from_storage.py +++ b/yeastregulatorydb/regulatory_data/utils/extract_file_from_storage.py @@ -1,7 +1,5 @@ import os -import shutil -import requests from django.conf import settings from django.core.files import File from django.core.files.storage import default_storage diff --git a/yeastregulatorydb/regulatory_data/utils/validate_df.py b/yeastregulatorydb/regulatory_data/utils/validate_df.py index 375f304..910e5e4 100644 --- a/yeastregulatorydb/regulatory_data/utils/validate_df.py +++ b/yeastregulatorydb/regulatory_data/utils/validate_df.py @@ -39,6 +39,14 @@ def validate_df( if not set(df[colname]).issubset(set(expected_type_or_levels)): raise ValueError(f"Column {colname} must be one of {expected_type_or_levels}") else: + # if the expected coltype is a string, try to cast all values to string + if expected_type_or_levels == str: + try: + df[colname] = df[colname].astype(str) + except ValueError: + raise ValueError( + f"Column {colname} is expected to be a str. It is not, and could not be cast to str. Fix it!" + ) if not all(isinstance(x, expected_type_or_levels) for x in df[colname]): raise ValueError(f"Column {colname} must be of type {expected_type_or_levels}") diff --git a/yeastregulatorydb/templates/binding_data/bulk_upload.html b/yeastregulatorydb/templates/binding_data/bulk_upload.html index 17ece76..a5d4d8f 100644 --- a/yeastregulatorydb/templates/binding_data/bulk_upload.html +++ b/yeastregulatorydb/templates/binding_data/bulk_upload.html @@ -1,21 +1,21 @@ - -
+ +