diff --git a/.gitignore b/.gitignore index efc99e0..a5c020e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,8 @@ -creds.json +# Creds +schematic_service_account_creds.json + +# Template files generated locally +*.schema.json + +# Ignore test logs +logs diff --git a/config.yml b/config.yml index 3523d9f..ab54131 100644 --- a/config.yml +++ b/config.yml @@ -1,26 +1,34 @@ -# Do not change the 'definitions' section unless you know what you're doing +# Do not change the "definitions" section unless you know what you"re doing # Usage: schematic manifest -c config.yml get -dt ClinicalAssay -t "Test Manifest" --jsonld ./NF.jsonld -s definitions: synapse_config: ".synapseConfig" - creds_path: "credentials.json" - token_pickle: "token.pickle" - service_acct_creds: "creds.json" + service_acct_creds: "schematic_service_account_creds.json" synapse: - master_fileview: 'syn23643253' - manifest_folder: 'manifests' - manifest_filename: 'synapse_storage_manifest.csv' - token_creds: 'syn23643259' - service_acct_creds: 'syn25171627' + master_fileview: "syn28142805" + manifest_folder: "manifests" + manifest_basename: "synapse_storage_manifest" + manifest_filename: "synapse_storage_manifest.csv" manifest: - title: 'Genomics_Data_Manifest' - data_type: 'GenomicsAssay' + title: "Gray_Foundation_Manifest" + data_type: + + # Clinical Data + - "CohortCore" + - "CohortCoreRFC" + - "CohortCorePortal" + + # File Metadata + - "ImagingLevel2" + - "scATAC-seqLevel1" + - "scRNA-seqLevel1" + - "scRNA-seqLevel3" model: input: - location: '~/NF.jsonld' - file_type: 'local' + location: "./GF.jsonld" + file_type: "local" style: google_manifest: @@ -32,5 +40,5 @@ style: red: 1.0 green: 1.0 blue: 0.9019 - master_template_id: '1LYS5qE4nV9jzcYw5sXwCza25slDfRA1CIg3cs-hCdpU' + master_template_id: "1LYS5qE4nV9jzcYw5sXwCza25slDfRA1CIg3cs-hCdpU" strict_validation: true diff --git a/templates/CohortCore.csv b/templates/CohortCore.csv new file mode 100644 index 0000000..dbcb46a --- /dev/null +++ b/templates/CohortCore.csv @@ -0,0 +1 @@ +Lab ID,Sex,Race,Ethnicity,Age,BMI,Age at Menarche,Genetic Alteration,Menopause Status,Gravidity,Parity \ No newline at end of file diff --git a/templates/CohortCorePortal.csv b/templates/CohortCorePortal.csv new file mode 100644 index 0000000..d51a1c8 --- /dev/null +++ b/templates/CohortCorePortal.csv @@ -0,0 +1 @@ +Atlas Participant ID,Atlas Group \ No newline at end of file diff --git a/templates/CohortCoreRFC.csv b/templates/CohortCoreRFC.csv new file mode 100644 index 0000000..346bfcf --- /dev/null +++ b/templates/CohortCoreRFC.csv @@ -0,0 +1 @@ +Class BRCA1,Class BRCA2,Menopause Cycle Stage,Primary Diagnosis,Previous Diagnosis,Tobacco Use,Pack Years Smoked,Alcohol Use,Drinks Per Week,Antibiotic Use \ No newline at end of file diff --git a/templates/ImagingLevel2.csv b/templates/ImagingLevel2.csv new file mode 100644 index 0000000..02f919d --- /dev/null +++ b/templates/ImagingLevel2.csv @@ -0,0 +1,2 @@ +Component,Filename,File Format,Lab ID,Channel Metadata Filename,Imaging Assay Type,Protocol Link,Software and Version,Microscope,Objective,NominalMagnification,LensNA,WorkingDistance,WorkingDistanceUnit,Immersion,Pyramid,Zstack,Tseries,Passed QC,Comment,FOV number,FOVX,FOVXUnit,FOVY,FOVYUnit,Frame Averaging,Image ID,DimensionOrder,PhysicalSizeX,PhysicalSizeXUnit,PhysicalSizeY,PhysicalSizeYUnit,PhysicalSizeZ,PhysicalSizeZUnit,Pixels BigEndian,PlaneCount,SizeC,SizeT,SizeX,SizeY,SizeZ,PixelType +ImagingLevel2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/templates/ScATAC-seqLevel1.csv b/templates/ScATAC-seqLevel1.csv new file mode 100644 index 0000000..81295cf --- /dev/null +++ b/templates/ScATAC-seqLevel1.csv @@ -0,0 +1,2 @@ +Component,Filename,File Format,Lab ID,Nucleic Acid Source,Dissociation Method,Single Nucleus Buffer,Single Cell Isolation Method,Transposition Reaction,scATACseq Library Layout,Nucleus Identifier,Nuclei Barcode Length,Nuclei Barcode Read,scATACseq Read1,scATACseq Read2,scATACseq Read3,Library Construction Method,Sequencing Platform,Threshold for Minimum Passing Reads,Total Number of Passing Nuclei,Median Fraction of Reads in Peaks,Median Fraction of Reads in Annotated cis DNA Elements,Median Passing Read Percentage,Median Percentage of Mitochondrial Reads per Nucleus,Technical Replicate Group,Total Reads,Protocol Link,Empty Well Barcode,Peaks Calling Software,Well Index +ScATAC-seqLevel1,,,,,,,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/templates/ScRNA-seqLevel1.csv b/templates/ScRNA-seqLevel1.csv new file mode 100644 index 0000000..63fa1ed --- /dev/null +++ b/templates/ScRNA-seqLevel1.csv @@ -0,0 +1,2 @@ +Component,Filename,File Format,Lab ID,Cryopreserved Cells in Sample,Single Cell Isolation Method,Dissociation Method,Library Construction Method,Read Indicator,Read1,Read2,End Bias,Reverse Transcription Primer,Spike In,Sequencing Platform,Total Number of Input Cells,Input Cells and Nuclei,Protocol Link,Technical Replicate Group,Empty Well Barcode,Feature Reference Id,Well Index +ScRNA-seqLevel1,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/templates/ScRNA-seqLevel2.csv b/templates/ScRNA-seqLevel2.csv new file mode 100644 index 0000000..e8a8099 --- /dev/null +++ b/templates/ScRNA-seqLevel2.csv @@ -0,0 +1,2 @@ +Component,Filename,File Format,Parent File ID,scRNAseq Workflow Type,Workflow Version,scRNAseq Workflow Parameters Description,Workflow Link,Genomic Reference,Genomic Reference URL,Genome Annotation URL,Whitelist Cell Barcode File Link,Cell Barcode Tag,UMI Tag,Applied Hard Trimming +ScRNA-seqLevel2,,,,,,,,,,,,,, \ No newline at end of file diff --git a/tests/README.md b/tests/README.md new file mode 100755 index 0000000..edd074d --- /dev/null +++ b/tests/README.md @@ -0,0 +1,45 @@ +## Test documentation + +This describes the testing framework for the data model. +The different types of tests are organized by the subdirectory and are explained below. + +### Test generation of templates + +This means means checking expectations that: +1. Production templates can be generated at all with the version of `schematic` used. +2. Production templates look as expected when created (e.g. all the attributes are there). + +Issues in case #1 could mean using an old version of version of `schematic` that doesn't support new rules, there is a problem with the data model such as a missing key error, etc. +Issues in case #2 could mean changes in template definitions inadvertently led to a different set of attributes than intended for the template. + +#### Test fixtures + +The fixtures needed are, of course, the data model, as well as the configuration to run, defined in `generate_config.json`. + +### Test validation of manifests against their templates + +This means checking expectations that: +1. Manifest data that should pass are indeed passed by `schematic`. +2. Manifest data that should fail are indeed caught by `schematic`. + +Issues in case #1 lead to a poor experience for data contributors, who wouldn't appreciate spurious validation errors. Issues in case #2 lead to bad data. + +#### Test fixtures + +In addition to the data model, some representative `.csv` manifests are needed. +The file `validate_config.json` defines the matrix of which manifests should be tested with their respective templates. + +### Test submission of manifests (TO DO) + +> Note : This will be more complicated than the other two test suites combined. + +This means checking that: +1. Valid manifests can be submitted at all. There have been cases where valid manifests have been unable to be submitted. +2. Manifest data are transferred correctly to Synapse (e.g. no weird conversions of types or truncation of data). This requires querying the test data that has been transferred to Synapse. + +#### Test fixtures + +This uses the same fixtures above plus a definition of what data should look like in Synapse. + + + diff --git a/tests/generate/config.json b/tests/generate/config.json new file mode 100755 index 0000000..1242036 --- /dev/null +++ b/tests/generate/config.json @@ -0,0 +1,37 @@ +[ + { + "template": "CohortCore", + "title": "Cohort Core Data", + "comment": "" + }, + { + "template": "CohortCoreRFC", + "title": "Cohort Core RFC Data", + "comment": "" + }, + { + "template": "CohortCorePortal", + "title": "Cohort Core Portal Data", + "comment": "" + }, + { + "template": "ImagingLevel2", + "title": "Imaging Level 2 Data", + "comment": "" + }, + { + "template": "ScATAC-seqLevel1", + "title": "scATAC-seq Level 1 Data", + "comment": "" + }, + { + "template": "ScRNA-seqLevel1", + "title": "scRNA-seq Level 1 Data", + "comment": "" + }, + { + "template": "ScRNA-seqLevel2", + "title": "scRNA-seq Level 2 Data", + "comment": "" + } +] diff --git a/tests/generate/generate.sh b/tests/generate/generate.sh new file mode 100755 index 0000000..47ef052 --- /dev/null +++ b/tests/generate/generate.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Generate .csv/Excel templates, to be provided: +# 2) as fallback for contribs who can't access GoogleSheets / DCA (maybe they're blocked by their institution, DCA is down, or just prefer being handed a .csv directly) +# 1) for documentation, i.e. read-only templates are embedded in user docs + +# A config file is expected as the first arg. +# Locally, run test from the root of the repo with `./tests/generate/generate.sh ./tests/generate/config.json`. +# However, most runs use GitHub Actions. + +OUTPUT_DIR=templates +LOG_DIR=logs +TEMPLATES=($(jq -r '.[].template' $1 | tr -d '"')) +TITLES=($(jq -r '.[].title' $1 | tr -d '"')) + +mkdir -p $OUTPUT_DIR +mkdir -p $LOG_DIR + +for i in ${!TEMPLATES[@]} +do + echo ">>>>>>> Getting ${TEMPLATES[$i]}" + schematic manifest --config config.yml get -dt ${TEMPLATES[$i]} --title ${TITLES[$i]} -o $OUTPUT_DIR/${TEMPLATES[$i]}.csv | tee $LOG_DIR/${TEMPLATES[$i]%.*}_log.txt + sleep 0.5 +done + +# Clean up all the intermediates +rm *.schema.json + +echo "Done!"