From abc80be96d12a9099df8b3f39a8323ba81bd787c Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:13:11 -0700 Subject: [PATCH 1/6] Update HTAN.model.csv --- HTAN.model.csv | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index ff302cdd..3f83e0d8 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -6,7 +6,7 @@ Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ -File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc",,,TRUE,,,, +File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, Xenium Experiment",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning HTAN Participant ID,HTAN ID associated with a patient based on HTAN ID SOP (eg HTANx_yyy ),,,,TRUE,Patient,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))$ warning @@ -122,7 +122,7 @@ GeoMx DSP Workflow Parameter Description,Parameters used to run the GeoMx DSP wo GeoMx DSP Workflow Link,Link to workflow or command. DockStore.org recommended. URL,,,,FALSE,Spatial Transcriptomics,,, NanoString GeoMx DSP ROI RCC Segment Annotation Metadata,GeoMx ROI and Segment Metadata Attributes. The assayed biospecimen should be reported one per row with the associated ROI coordinates. ,,"HTAN Parent Biospecimen ID, Scan name, ROI name, Segment name, ROI X Coordinate,ROI Y Coordinate, Tags, QC status, Scan Height, Scan Width, Scan Offset X, Scan Offset Y, Binding Density, Positive norm factor, Surface area, Nuclei count, Tissue Stain",,FALSE,Assay,,, Scan name,GeoMx Scan name (as appears in Segment Summary),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, -ROI name,ROI name (application generated),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, +ROI name,"ROI name (application generated). For Xenium this is referred to as the “region name”",,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Segment name,Name given to segment at time of generation,,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, Tags,Unique descriptor of a variable group (ie. MAPK+),,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, ROI X Coordinate,X location within the image,,,,TRUE,"NanoString GeoMx DSP ROI RCC Segment Annotation Metadata, NanoString GeoMx DSP ROI DCC Segment Annotation Metadata",,, @@ -161,6 +161,11 @@ Stripe Window,"Binning size used for calling significant architectural stripes. Loop Calling,Tool used for identifying loop interactions,,,,TRUE,Sequencing,,, Imaging Level 4,Derived imaging data: Object-by-feature array,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Parent Channel Metadata ID, HTAN Data File ID, Parameter file, Software and Version, Commit SHA,Number of Objects, Number of Features,Imaging Object Class, Imaging Summary Statistic",,FALSE,Assay,Imaging Level 3 Channels,, SRRS Imaging Level 2,SRRS-specific HTAN raw and pre-processed image data,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Data File ID, Channel Metadata Filename, Imaging Assay Type, Protocol Link, Software and Version, Microscope, Objective, NominalMagnification, Pyramid, Zstack, Tseries, Passed QC, Frame Averaging, Image ID, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, Pixels BigEndian, PlaneCount, SizeC, SizeT, SizeX, SizeY, SizeZ, PixelType",,FALSE,Assay,Biospecimen,, +10X Genomics Xenium ISS Experiment,"All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Xenium Bundle Contents, Slide ID, ROI Name, Panel Name, Protocol Link, Software and Version,Total Number of Cells, Total Number of Targets,Surface Area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Decoded Transcripts, Xenium IF image HTAN File ID, Xenium HE image HTAN File ID",,FALSE,Spatial Transcriptomics,Biospecimen,, +Xenium Bundle Contents,A comma separated list of filenames within the Xenium bundle zip file,,,,TRUE,Spatial Transcriptomics,,, +Decoded Transcripts,"In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density",,,,TRUE,Spatial Transcriptomics,,, +Xenium IF image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, +Xenium HE image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is fitted using the monotone increasing B-spline model in the SuperCurve R package. This fits a single curve using all the samples on a slide with the signal intensity as the response variable and the dilution steps as independent variables. The fitted curve is plotted with the signal intensities on the y-axis and the log2-concentration of proteins on the x-axis for diagnostic purposes.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, HTAN RPPA Antibody Table, Assay Type, Protocol Link, Software and Version",,FALSE,Assay,Biospecimen,, HTAN RPPA Antibody Table,A table containing antibody level metadata for RPPA,,"HTAN RPPA Antibody Table ID, Filename, File Format, Ab Name Reported on Dataset, GENCODE Gene Symbol Target, UNIPROT Protein ID Target, Phosphoprotein Flag, Vendor, Catalog Number, Internal Ab ID, Species, RPPA Dilution, Phospho Site, RPPA Validation Status, Clone, Clonality, Antibody Notes",,TRUE,RPPA Level 2,,, RPPA Level 3,Level 3 Reverse Phase Protein Array (RPPA) data contains intra-batch normalized intensities.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, Assay Type, Software and Version, Normalization Method",,FALSE,Assay,Biospecimen,, @@ -1011,4 +1016,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, From 1b8a7f209827cedc5cdf8df8c740b56de1d17844 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 13 Apr 2024 08:08:15 +0000 Subject: [PATCH 2/6] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 310 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 309 insertions(+), 1 deletion(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 8a7f0c35..414e8d05 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -1291,6 +1291,9 @@ }, { "@id": "bts:Pkc" + }, + { + "@id": "bts:XeniumExperiment" } ], "sms:displayName": "File Format", @@ -2836,6 +2839,23 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:XeniumExperiment", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "XeniumExperiment", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Xenium Experiment", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:Checksum", "@type": "rdfs:Class", @@ -41469,7 +41489,7 @@ { "@id": "bts:ROIname", "@type": "rdfs:Class", - "rdfs:comment": "ROI name (application generated)", + "rdfs:comment": "ROI name (application generated). For Xenium this is referred to as the “region name”", "rdfs:label": "ROIname", "rdfs:subClassOf": [ { @@ -43199,6 +43219,294 @@ ], "sms:validationRules": [] }, + { + "@id": "bts:10XGenomicsXeniumISSExperiment", + "@type": "rdfs:Class", + "rdfs:comment": "All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment", + "rdfs:label": "10XGenomicsXeniumISSExperiment", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "10X Genomics Xenium ISS Experiment", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Biospecimen" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Filename" + }, + { + "@id": "bts:FileFormat" + }, + { + "@id": "bts:HTANParentBiospecimenID" + }, + { + "@id": "bts:HTANDataFileID" + }, + { + "@id": "bts:XeniumBundleContents" + }, + { + "@id": "bts:SlideID" + }, + { + "@id": "bts:ROIName" + }, + { + "@id": "bts:PanelName" + }, + { + "@id": "bts:ProtocolLink" + }, + { + "@id": "bts:SoftwareandVersion" + }, + { + "@id": "bts:TotalNumberofCells" + }, + { + "@id": "bts:TotalNumberofTargets" + }, + { + "@id": "bts:SurfaceArea" + }, + { + "@id": "bts:ExperimentIFChannels" + }, + { + "@id": "bts:TranscriptsperCell" + }, + { + "@id": "bts:PercentofTranscriptswithinCells" + }, + { + "@id": "bts:DecodedTranscripts" + }, + { + "@id": "bts:XeniumIFimageHTANFileID" + }, + { + "@id": "bts:XeniumHEimageHTANFileID" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:XeniumBundleContents", + "@type": "rdfs:Class", + "rdfs:comment": "A comma separated list of filenames within the Xenium bundle zip file", + "rdfs:label": "XeniumBundleContents", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Xenium Bundle Contents", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:ROIName", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ROIName", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ROI Name", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:PanelName", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PanelName", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Panel Name", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:TotalNumberofCells", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TotalNumberofCells", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Total Number of Cells", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:TotalNumberofTargets", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TotalNumberofTargets", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Total Number of Targets", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:SurfaceArea", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SurfaceArea", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Surface Area", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:ExperimentIFChannels", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ExperimentIFChannels", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Experiment IF Channels", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:TranscriptsperCell", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TranscriptsperCell", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Transcripts per Cell", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:PercentofTranscriptswithinCells", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PercentofTranscriptswithinCells", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Percent of Transcripts within Cells", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:DecodedTranscripts", + "@type": "rdfs:Class", + "rdfs:comment": "In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density", + "rdfs:label": "DecodedTranscripts", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Decoded Transcripts", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:XeniumIFimageHTANFileID", + "@type": "rdfs:Class", + "rdfs:comment": "The HTAN Data File ID of a Imaging Level 2 file", + "rdfs:label": "XeniumIFimageHTANFileID", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Xenium IF image HTAN File ID", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:XeniumHEimageHTANFileID", + "@type": "rdfs:Class", + "rdfs:comment": "The HTAN Data File ID of a Imaging Level 2 file", + "rdfs:label": "XeniumHEimageHTANFileID", + "rdfs:subClassOf": [ + { + "@id": "bts:SpatialTranscriptomics" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Xenium HE image HTAN File ID", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:RPPALevel2", "@type": "rdfs:Class", From fe3be3d5d3a91f36155774f661fbf83ac5ae7520 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 15 Apr 2024 10:06:42 -0700 Subject: [PATCH 3/6] Fixes for missing attributes and case mistakes. Fixes for missing attributes and case mistakes. --- HTAN.model.csv | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 7ba47927..923060b9 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -161,8 +161,14 @@ Stripe Window,"Binning size used for calling significant architectural stripes. Loop Calling,Tool used for identifying loop interactions,,,,TRUE,Sequencing,,, Imaging Level 4,Derived imaging data: Object-by-feature array,,"Component, Filename, File Format, HTAN Parent Data File ID, HTAN Parent Channel Metadata ID, HTAN Data File ID, Parameter file, Software and Version, Commit SHA,Number of Objects, Number of Features,Imaging Object Class, Imaging Summary Statistic",,FALSE,Assay,Imaging Level 3 Channels,, SRRS Imaging Level 2,SRRS-specific HTAN raw and pre-processed image data,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Data File ID, Channel Metadata Filename, Imaging Assay Type, Protocol Link, Software and Version, Microscope, Objective, NominalMagnification, Pyramid, Zstack, Tseries, Passed QC, Frame Averaging, Image ID, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, Pixels BigEndian, PlaneCount, SizeC, SizeT, SizeX, SizeY, SizeZ, PixelType",,FALSE,Assay,Biospecimen,, -10X Genomics Xenium ISS Experiment,"All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Xenium Bundle Contents, Slide ID, ROI Name, Panel Name, Protocol Link, Software and Version,Total Number of Cells, Total Number of Targets,Surface Area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Decoded Transcripts, Xenium IF image HTAN File ID, Xenium HE image HTAN File ID",,FALSE,Spatial Transcriptomics,Biospecimen,, +10X Genomics Xenium ISS Experiment,"All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Xenium Bundle Contents, Slide ID, ROI name, Panel Name, Protocol Link, Software and Version,Total Number of Cells, Total Number of Targets, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Decoded Transcripts, Xenium IF image HTAN File ID, Xenium HE image HTAN File ID",,FALSE,Spatial Transcriptomics,Biospecimen,, Xenium Bundle Contents,A comma separated list of filenames within the Xenium bundle zip file,,,,TRUE,Spatial Transcriptomics,,, +Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, +Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, +Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, +Experiment IF Channels,"A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)",,,,TRUE,Spatial Transcriptomics,,, +Transcripts per Cell,Mean or Median transcript count per cell analyzed on the flow cell or slide,,,,TRUE,Spatial Transcriptomics,,, +Percent of Transcripts within Cells,The percentage of transcripts assigned to assayed cells,,,,TRUE,Spatial Transcriptomics,,, Decoded Transcripts,"In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density",,,,TRUE,Spatial Transcriptomics,,, Xenium IF image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, Xenium HE image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, From db57efd4cd113a61c7ae74285aa4a80448accdcb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:13:31 +0000 Subject: [PATCH 4/6] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 74 +++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 54 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 414e8d05..a4e16fc7 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -43262,7 +43262,7 @@ "@id": "bts:SlideID" }, { - "@id": "bts:ROIName" + "@id": "bts:ROIname" }, { "@id": "bts:PanelName" @@ -43280,7 +43280,7 @@ "@id": "bts:TotalNumberofTargets" }, { - "@id": "bts:SurfaceArea" + "@id": "bts:Surfacearea" }, { "@id": "bts:ExperimentIFChannels" @@ -43320,140 +43320,106 @@ "sms:required": "sms:true", "sms:validationRules": [] }, - { - "@id": "bts:ROIName", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ROIName", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "ROI Name", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:PanelName", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name", "rdfs:label": "PanelName", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Panel Name", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { "@id": "bts:TotalNumberofCells", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "The total number of cells analyzed on the flow cell", "rdfs:label": "TotalNumberofCells", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Total Number of Cells", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { "@id": "bts:TotalNumberofTargets", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "Refers to the target of an assay. Can be genes/transcripts or probes", "rdfs:label": "TotalNumberofTargets", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Total Number of Targets", - "sms:required": "sms:false", - "sms:validationRules": [] - }, - { - "@id": "bts:SurfaceArea", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "SurfaceArea", - "rdfs:subClassOf": [ - { - "@id": "bts:Thing" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Surface Area", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { "@id": "bts:ExperimentIFChannels", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)", "rdfs:label": "ExperimentIFChannels", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Experiment IF Channels", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { "@id": "bts:TranscriptsperCell", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "Mean or Median transcript count per cell analyzed on the flow cell or slide", "rdfs:label": "TranscriptsperCell", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Transcripts per Cell", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { "@id": "bts:PercentofTranscriptswithinCells", "@type": "rdfs:Class", - "rdfs:comment": "TBD", + "rdfs:comment": "The percentage of transcripts assigned to assayed cells", "rdfs:label": "PercentofTranscriptswithinCells", "rdfs:subClassOf": [ { - "@id": "bts:Thing" + "@id": "bts:SpatialTranscriptomics" } ], "schema:isPartOf": { "@id": "http://schema.biothings.io" }, "sms:displayName": "Percent of Transcripts within Cells", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [] }, { From f731914ca3a125453071f19e2821c7fecde32bc9 Mon Sep 17 00:00:00 2001 From: PozhidayevaDarya <106110175+PozhidayevaDarya@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:40:17 -0700 Subject: [PATCH 5/6] Update HTAN.model.csv --- HTAN.model.csv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HTAN.model.csv b/HTAN.model.csv index 735c36c6..f80a2738 100644 --- a/HTAN.model.csv +++ b/HTAN.model.csv @@ -6,7 +6,7 @@ Component,"Category of metadata (e.g. Diagnosis, Biospecimen, scRNA-seq Level 1, Patient,HTAN patient,,"Component, HTAN Participant ID",,FALSE,Individual Organism,"Demographics, Family History, Exposure, Follow Up, Diagnosis, Therapy, Molecular Test",, File,A type of Information Content Entity specific to OS,,,,FALSE,Information Content Entity,,https://w3id.org/biolink/vocab/DataFile, Filename,Name of a file,,,,TRUE,,,,regex search ^.+\/\S*$ -File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc, Xenium Experiment",,,TRUE,,,, +File Format,"Format of a file (e.g. txt, csv, fastq, bam, etc.)","hdf5, bedgraph, idx, idat, bam, bai, excel, powerpoint, tif, tiff, OME-TIFF, png, doc, pdf, fasta, fastq, sam, vcf, bcf, maf, bed, chp, cel, sif, tsv, csv, txt, plink, bigwig, wiggle, gct, bgzip, zip, seg, html, mov, hyperlink, svs, md, flagstat, gtf, raw, msf, rmd, bed narrowPeak, bed broadPeak, bed gappedPeak, avi, pzfx, fig, xml, tar, R script, abf, bpm, dat, jpg, locs, Sentrix descriptor file, Python script, sav, gzip, sdf, RData, hic, ab1, 7z, gff3, json, sqlite, svg, sra, recal, tranches, mtx, tagAlign, dup, DICOM, czi, mex, cloupe, am, cell am, mpg, m, mzML,scn, dcc, rcc, pkc",,,TRUE,,,, Checksum,MD5 checksum of the BAM file,,,,TRUE,Information Content Entity,,, HTAN Data File ID,Self-identifier for this data file - HTAN ID of this file HTAN ID SOP (eg HTANx_yyy_zzz),,,,TRUE,File,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))_([0-9]\d*|0000)$ warning HTAN Participant ID,HTAN ID associated with a patient based on HTAN ID SOP (eg HTANx_yyy ),,,,TRUE,Patient,,https://docs.google.com/document/d/1podtPP8L1UNvVxx9_c_szlDcU1f8n7bige6XA_GoRVM/edit?usp=sharing,regex match ^(HTA([1-9]|1[0-6]))_((EXT)?([0-9]\d*|0000))$ warning @@ -163,13 +163,13 @@ Imaging Level 4,Derived imaging data: Object-by-feature array,,"Component, Filen SRRS Imaging Level 2,SRRS-specific HTAN raw and pre-processed image data,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Data File ID, Channel Metadata Filename, Imaging Assay Type, Protocol Link, Software and Version, Microscope, Objective, NominalMagnification, Pyramid, Zstack, Tseries, Passed QC, Frame Averaging, Image ID, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, Pixels BigEndian, PlaneCount, SizeC, SizeT, SizeX, SizeY, SizeZ, PixelType",,FALSE,Assay,Biospecimen,, 10X Genomics Xenium ISS Experiment,"All data pertaining to the 10X Genomics Xenium In-Situ Hybridization experiment",,"Component, Filename, File Format, HTAN Parent Biospecimen ID, HTAN Data File ID, Xenium Bundle Contents, Slide ID, ROI name, Panel Name, Protocol Link, Software and Version,Total Number of Cells, Total Number of Targets, Surface area, Experiment IF Channels, Transcripts per Cell, Percent of Transcripts within Cells, Decoded Transcripts, Xenium IF image HTAN File ID, Xenium HE image HTAN File ID",,FALSE,Spatial Transcriptomics,Biospecimen,, Xenium Bundle Contents,A comma separated list of filenames within the Xenium bundle zip file,,,,TRUE,Spatial Transcriptomics,,, -Panel Name,The human-readable panel name. This could be the Gene Panel name or Protein Panel name,,,,TRUE,Spatial Transcriptomics,,, +Panel Name,"The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex))",,,,TRUE,Spatial Transcriptomics,,, Total Number of Cells,The total number of cells analyzed on the flow cell,,,,TRUE,Spatial Transcriptomics,,, Total Number of Targets,Refers to the target of an assay. Can be genes/transcripts or probes,,,,TRUE,Spatial Transcriptomics,,, Experiment IF Channels,"A comma-separated list with any number of channels the user deems appropriate(Example: PanCK, CD45, CD3, DAPI)",,,,TRUE,Spatial Transcriptomics,,, Transcripts per Cell,Mean or Median transcript count per cell analyzed on the flow cell or slide,,,,TRUE,Spatial Transcriptomics,,, Percent of Transcripts within Cells,The percentage of transcripts assigned to assayed cells,,,,TRUE,Spatial Transcriptomics,,, -Decoded Transcripts,"In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density",,,,TRUE,Spatial Transcriptomics,,, +Decoded Transcripts,"In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density (units are reported in 100um^2).",,,,TRUE,Spatial Transcriptomics,,, Xenium IF image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, Xenium HE image HTAN File ID,The HTAN Data File ID of a Imaging Level 2 file,,,,FALSE,Spatial Transcriptomics,,, RPPA Level 2,Array based protemics. Each dilution curve of spot intensities is fitted using the monotone increasing B-spline model in the SuperCurve R package. This fits a single curve using all the samples on a slide with the signal intensity as the response variable and the dilution steps as independent variables. The fitted curve is plotted with the signal intensities on the y-axis and the log2-concentration of proteins on the x-axis for diagnostic purposes.,,"Component, Filename, File Format, HTAN Participant ID, HTAN Parent Biospecimen ID, HTAN Parent Data File ID, HTAN Data File ID, HTAN RPPA Antibody Table, Assay Type, Protocol Link, Software and Version",,FALSE,Assay,Biospecimen,, @@ -1022,4 +1022,4 @@ Tile overlap Y,Percentage of image overlap to allow tile stitching in x directio Barretts Esophagus Goblet Cells Present,Presence or absennce of Barretts esophagus goblet cells.,"Yes, No",,,FALSE,Follow Up,,, Pancreatitis Onset Year,Date of onset of pancreatitis.,,,,FALSE,Follow Up,,,num HTAN Parent Channel Metadata ID,HTAN ID for a level 3 channels table.,,,,TRUE, Imaging Level 4,,, -Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, \ No newline at end of file +Single Nucleus Capture,Nuclei isolation method,"Plates, 10x, droplet",,,FALSE,scmC-seq Level 1,,, From 028fbe6308599d31fd0383628f39e90bb9cc83f2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 15:31:27 +0000 Subject: [PATCH 6/6] GitHub Action: convert *.model.csv to *.model.jsonld --- HTAN.model.jsonld | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/HTAN.model.jsonld b/HTAN.model.jsonld index 357344bf..eb787449 100644 --- a/HTAN.model.jsonld +++ b/HTAN.model.jsonld @@ -1291,9 +1291,6 @@ }, { "@id": "bts:Pkc" - }, - { - "@id": "bts:XeniumExperiment" } ], "sms:displayName": "File Format", @@ -2839,23 +2836,6 @@ "sms:required": "sms:false", "sms:validationRules": [] }, - { - "@id": "bts:XeniumExperiment", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "XeniumExperiment", - "rdfs:subClassOf": [ - { - "@id": "bts:FileFormat" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Xenium Experiment", - "sms:required": "sms:false", - "sms:validationRules": [] - }, { "@id": "bts:Checksum", "@type": "rdfs:Class", @@ -43341,7 +43321,7 @@ { "@id": "bts:PanelName", "@type": "rdfs:Class", - "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name", + "rdfs:comment": "The human-readable panel name. This could be the Gene Panel name or Protein Panel name. In Xenium, this refers to the string entered as the name in panel specification (e.g. Xenium Human Immuno-Oncology Add-on B Gene Expression). In CosMx, this refers to the panel name as it appears in the CosMx catalog (e.g. CosMx Human Universal Cell Characterization Panel (1000-plex))", "rdfs:label": "PanelName", "rdfs:subClassOf": [ { @@ -43443,7 +43423,7 @@ { "@id": "bts:DecodedTranscripts", "@type": "rdfs:Class", - "rdfs:comment": "In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density", + "rdfs:comment": "In Xenium, this is the number of high-quality, decoded-to-gene nuclear transcripts divided by the total segmented nuclear area to get a transcript density (units are reported in 100um^2).", "rdfs:label": "DecodedTranscripts", "rdfs:subClassOf": [ {