From 7557e19c6e73e80dea152ef7fd9d010f727b0db2 Mon Sep 17 00:00:00 2001 From: aditigopalan <63365451+aditigopalan@users.noreply.github.com> Date: Thu, 9 May 2024 08:56:52 -0400 Subject: [PATCH 1/5] Update HTAN.model.csv --- HTAN.model.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index ef3ec35f..20cb1394 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -6,7 +6,7 @@ Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ -File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, sf",,,TRUE,,,, +File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, sf, bedpe",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning HTAN Participant ID,HTAN ID associated with a patient based on HTAN ID SOP (eg HTANx_yyy ),,,,TRUE,Patient,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))$ warning @@ -155,7 +155,7 @@ Ligation Condition,Name of ligase and condition for proximity ligation,,,,TRUE,S Biotin Enrichment,Whether biotin is used for enriching ligation product,"Yes, No",,,TRUE,Sequencing,,, DNA Input Amount,"Amount of DNA for library construction, in nanograms.",,,,TRUE,Sequencing,,,int Resolution,"Binning size used for generating contact matrix, in basepair.",,,,TRUE,Sequencing,,, -Stripe Calling,"Tool used for identifying architectural stripe-forming, interaction hotspots.","MACS2, Other",,,TRUE,Sequencing,,, +Stripe Calling,"Tool used for identifying architectural stripe-forming, interaction hotspots.","MACS2, Other",,,TRUE,Sequencing,,,list::-?\d+ Loop Window,Binning size used for calling significant dot interactions (loops),,,,TRUE,Sequencing,,,int Stripe Window,"Binning size used for calling significant architectural stripes. Can be an integer or int/int, indicating bin size and sliding window size if different.","HiCCUPS, Cooltools, Other",,,TRUE,Sequencing,,, Loop Calling,Tool used for identifying loop interactions,,,,TRUE,Sequencing,,, @@ -1035,4 +1035,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file From f4889fde9225ce6443eb9c9d462588bc82284b58 Mon Sep 17 00:00:00 2001 From: aditigopalan <63365451+aditigopalan@users.noreply.github.com> Date: Thu, 9 May 2024 08:58:31 -0400 Subject: [PATCH 2/5] Update HTAN.model.csv --- HTAN.model.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 20cb1394..5014710c 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -1035,4 +1035,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, From 45a916021eec279ade24e9bb7123898dfe85b987 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 9 May 2024 13:04:59 +0000 Subject: [PATCH 3/5] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index de8bd16b..95efae1a 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -1294,6 +1294,9 @@ }, { "@id": "bts:Sf" + }, + { + "@id": "bts:Bedpe" } ], "sms:displayName": "File Format", @@ -2856,6 +2859,23 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:Bedpe", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Bedpe", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "bedpe", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:Checksum", "@type": "rdfs:Class", @@ -42299,7 +42319,10 @@ ], "sms:displayName": "Stripe Calling", "sms:required": "sms:true", - "sms:validationRules": [] + "sms:validationRules": [ + "list", + "-?\\d+" + ] }, { "@id": "bts:LoopWindow", From 405cc747a69ab78338606d8c844de27bbd331c4e Mon Sep 17 00:00:00 2001 From: aditigopalan <63365451+aditigopalan@users.noreply.github.com> Date: Tue, 28 May 2024 11:56:09 -0400 Subject: [PATCH 4/5] Loop and stripe window update Updating Loop & Stripe Window --- HTAN.model.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 20cb1394..22ae7617 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -156,9 +156,9 @@ Biotin Enrichment,Whether biotin is used for enriching ligation product,"Yes, No DNA Input Amount,"Amount of DNA for library construction, in nanograms.",,,,TRUE,Sequencing,,,int Resolution,"Binning size used for generating contact matrix, in basepair.",,,,TRUE,Sequencing,,, Stripe Calling,"Tool used for identifying architectural stripe-forming, interaction hotspots.","MACS2, Other",,,TRUE,Sequencing,,,list::-?\d+ -Loop Window,Binning size used for calling significant dot interactions (loops),,,,TRUE,Sequencing,,,int -Stripe Window,"Binning size used for calling significant architectural stripes. Can be an integer or int/int, indicating bin size and sliding window size if different.","HiCCUPS, Cooltools, Other",,,TRUE,Sequencing,,, -Loop Calling,Tool used for identifying loop interactions,,,,TRUE,Sequencing,,, +Loop Window,Binning size used for calling significant dot interactions (loops),,,,TRUE,Sequencing,,,list like :: regex search -?\d+ +Stripe Window,Binning size used for calling significant architectural stripes. Can be an integer or comma-separated list of integers indicating bin size and sliding window size if different.,,,,TRUE,Sequencing,,,list like :: regex search -?\d+ +Loop Calling,Tool used for identifying loop interactions,"HiCCUPS, Cooltools, Other",,,TRUE,Sequencing,,, Imaging Level 4,Derived imaging data: Object-by-feature array,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Parent Channel Metadata ID, HTAN Data File ID, Parameter file, Software and Version, Commit SHA,Number of Objects, Number of Features,Imaging Object Class, Imaging Summary Statistic",,FALSE,Assay,Imaging Level 3 Channels,, SRRS Imaging Level 2,SRRS-specific HTAN raw and pre-processed image data,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Data File ID, Channel Metadata Filename, Imaging Assay Type, Protocol Link, Software and Version, Microscope, Objective, NominalMagnification, Pyramid, Zstack, Tseries, Passed QC, Frame Averaging, Image ID, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, Pixels BigEndian, PlaneCount, SizeC, SizeT, SizeX, SizeY, SizeZ, PixelType",,FALSE,Assay,Biospecimen,, 10X Genomics Xenium ISS Experiment,All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment,,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Xenium Bundle Contents, Slide ID, ROI name, Panel Name, Protocol Link, Software and Version,Total Number of Cells, Total Number of Targets, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Decoded Transcripts, Xenium IF image HTAN File ID, Xenium HE image HTAN File ID",,FALSE,Spatial Transcriptomics,Biospecimen,, From f900ba5dec27b02dbcf9e5e16c78d1e289999e20 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 28 May 2024 16:03:47 +0000 Subject: [PATCH 5/5] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 95efae1a..fd371b30 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -42340,13 +42340,14 @@ "sms:displayName": "Loop Window", "sms:required": "sms:true", "sms:validationRules": [ - "int" + "list like ", + " regex search -?\\d+" ] }, { "@id": "bts:StripeWindow", "@type": "rdfs:Class", - "rdfs:comment": "Binning size used for calling significant architectural stripes. Can be an integer or int/int, indicating bin size and sliding window size if different.", + "rdfs:comment": "Binning size used for calling significant architectural stripes. Can be an integer or comma-separated list of integers indicating bin size and sliding window size if different.", "rdfs:label": "StripeWindow", "rdfs:subClassOf": [ { @@ -42356,20 +42357,12 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "schema:rangeIncludes": [ - { - "@id": "bts:HiCCUPS" - }, - { - "@id": "bts:Cooltools" - }, - { - "@id": "bts:Other" - } - ], "sms:displayName": "Stripe Window", "sms:required": "sms:true", - "sms:validationRules": [] + "sms:validationRules": [ + "list like ", + " regex search -?\\d+" + ] }, { "@id": "bts:LoopCalling", @@ -42384,6 +42377,17 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, + "schema:rangeIncludes": [ + { + "@id": "bts:HiCCUPS" + }, + { + "@id": "bts:Cooltools" + }, + { + "@id": "bts:Other" + } + ], "sms:displayName": "Loop Calling", "sms:required": "sms:true", "sms:validationRules": [] @@ -42782,7 +42786,7 @@ "@id": "bts:StripeCalling" }, { - "@id": "bts:StripeWindow" + "@id": "bts:LoopCalling" }, { "@id": "bts:HistologyAssessmentBy" @@ -43006,7 +43010,7 @@ "rdfs:label": "HiCCUPS", "rdfs:subClassOf": [ { - "@id": "bts:StripeWindow" + "@id": "bts:LoopCalling" } ], "schema:isPartOf": { @@ -43023,7 +43027,7 @@ "rdfs:label": "Cooltools", "rdfs:subClassOf": [ { - "@id": "bts:StripeWindow" + "@id": "bts:LoopCalling" } ], "schema:isPartOf": {