Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VALIS-40-update-genome-preprocess #31

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.deleteDirectory = void 0;
const Terminal_1 = require("./Terminal");
const fs = require("fs");
function deleteDirectory(directory) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Terminal = void 0;
const util = require("util");
const process = require("process");
class Terminal {
Expand Down Expand Up @@ -56,8 +57,8 @@ class Terminal {
}
}
}
Terminal.currentRewriteId = undefined;
exports.Terminal = Terminal;
Terminal.currentRewriteId = undefined;
exports.default = Terminal;
var FormatFlag;
(function (FormatFlag) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Tileset = void 0;
class Tileset {
constructor(tileSize) {
this.tileSize = tileSize;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const AnnotationTypes_1 = require("../../../../src/track/annotation/AnnotationTypes");
exports.AnnotationTileset = void 0;
const AnnotationTypes_1 = require("./AnnotationTypes");
class AnnotationTileset {
constructor(tileSize, topLevelOnly, onUnknownFeature, onError) {
this.tileSize = tileSize;
Expand Down Expand Up @@ -40,17 +41,17 @@ class AnnotationTileset {
let isTranscript = AnnotationTypes_1.SoTranscriptClass.instance[c.type] !== undefined;
return isTranscript ? (p + 1) : p;
}, 0);
let gene = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.Gene, class: AnnotationTypes_1.SoGeneClass.instance[feature.type], strand: feature.strand, transcriptCount: transcriptCount });
let gene = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.Gene, class: AnnotationTypes_1.SoGeneClass.instance[feature.type], strand: feature.strand, transcriptCount: transcriptCount });
tile.content.push(gene);
}
else if (AnnotationTypes_1.SoTranscriptClass.instance[feature.type] !== undefined) {
// is transcript
let transcript = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.Transcript, class: AnnotationTypes_1.SoTranscriptClass.instance[feature.type] });
let transcript = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.Transcript, class: AnnotationTypes_1.SoTranscriptClass.instance[feature.type] });
tile.content.push(transcript);
}
else if (AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] !== undefined) {
// is transcript component
let info = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.TranscriptComponent, class: AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] });
let info = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.TranscriptComponent, class: AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] });
if (feature.phase != null) {
info.phase = feature.phase;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SoTranscriptComponentClass = exports.SoTranscriptClass = exports.SoGeneClass = exports.TranscriptComponentClass = exports.TranscriptClass = exports.GeneClass = exports.GenomeFeatureType = exports.Strand = void 0;
var Strand;
(function (Strand) {
Strand["None"] = ".";
Expand Down Expand Up @@ -50,8 +51,8 @@ class SoGeneClass {
this['pseudogene'] = GeneClass.Pseudo;
}
}
SoGeneClass.instance = new SoGeneClass();
exports.SoGeneClass = SoGeneClass;
SoGeneClass.instance = new SoGeneClass();
class SoTranscriptClass {
constructor() {
this['transcript'] = TranscriptClass.Unspecified;
Expand All @@ -66,8 +67,8 @@ class SoTranscriptClass {
this['snRNA'] = TranscriptClass.NonProteinCoding;
}
}
SoTranscriptClass.instance = new SoTranscriptClass();
exports.SoTranscriptClass = SoTranscriptClass;
SoTranscriptClass.instance = new SoTranscriptClass();
class SoTranscriptComponentClass {
constructor() {
this['CDS'] = TranscriptComponentClass.ProteinCodingSequence;
Expand All @@ -76,5 +77,5 @@ class SoTranscriptComponentClass {
this['three_prime_UTR'] = TranscriptComponentClass.Untranslated;
}
}
SoTranscriptComponentClass.instance = new SoTranscriptComponentClass();
exports.SoTranscriptComponentClass = SoTranscriptComponentClass;
SoTranscriptComponentClass.instance = new SoTranscriptComponentClass();
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* - A 'gene' by default is a protein-coding gene
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.gff3Convert = void 0;
const fs = require("fs");
const path = require("path");
const AnnotationTileset_1 = require("./AnnotationTileset");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getBranches = exports.printSummary = void 0;
const Terminal_1 = require("../Terminal");
function printSummary(features) {
Terminal_1.default.log('Parsing complete\n');
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.vcfConvert = void 0;
const fs = require("fs");
const path = require("path");
const Terminal_1 = require("../Terminal");
Expand Down Expand Up @@ -37,11 +38,10 @@ function vcfConvert(inputFilePath, outputDirectory) {
alts: feature.ALT.split('')
});
}
// @! temporary for demo
let species = 'l_fortunei';
saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${species.toLowerCase()}.vvariants-dir/${species.toLowerCase()}`);
let inputFilename = path.basename(inputFilePath);
saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${inputFilename.toLowerCase()}.vvariants-dir/${inputFilename.toLowerCase()}`);
// @! temporary, save out genes for biobureau demo
filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf));
// filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf));
resolve(filesWritten);
}
});
Expand Down Expand Up @@ -255,7 +255,7 @@ class VCFParser {
}
this.callbacks.onComplete(this.output);
};
this.callbacks = Object.assign({}, this.callbacks, callbacks);
this.callbacks = Object.assign(Object.assign({}, this.callbacks), callbacks);
}
onMetaLine(line) {
if (line.trim() === '')
Expand Down
22 changes: 11 additions & 11 deletions tools/genome-preprocess/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tools/genome-preprocess/src/gff3/AnnotationTileset.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Feature } from "genomics-formats/lib/gff3/Feature";
import { SoGeneClass, SoTranscriptClass, GenomeFeature, GenomeFeatureType, SoTranscriptComponentClass, GeneInfo, TranscriptComponentInfo, TranscriptInfo } from "../../../../src/track/annotation/AnnotationTypes";
import { SoGeneClass, SoTranscriptClass, GenomeFeature, GenomeFeatureType, SoTranscriptComponentClass, GeneInfo, TranscriptComponentInfo, TranscriptInfo } from "./AnnotationTypes";

export type AnnotationTile = {
startIndex: number,
Expand Down
118 changes: 118 additions & 0 deletions tools/genome-preprocess/src/gff3/AnnotationTypes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
export enum Strand {
None = '.',
Unknown = '?',
Positive = '+',
Negative = '-'
}

export enum GenomeFeatureType {
// order corresponds to nesting depth
Gene,
Transcript,
TranscriptComponent,
}

export interface GenomeFeature {
type: GenomeFeatureType,
}

export enum GeneClass {
// this is a small, simplified subset of types specified in the Sequence Ontology
Unspecified,
ProteinCoding, // assumed default
NonProteinCoding, // aka regulatory
Pseudo, // non-functional imperfect copy
}

export interface GeneInfo extends GenomeFeature {
type: GenomeFeatureType.Gene,
name?: string,
startIndex: number,
length: number,
strand: Strand,
class: GeneClass,
soClass: keyof SoGeneClass,
transcriptCount: number,
score?: number,
}

export enum TranscriptClass {
Unspecified,
// aka protein coding RNA
ProteinCoding,
// non-protein coding
NonProteinCoding,
// sub-types include
// Ribosomal
// Transfer
// Small nuclear
// Small nucleolar
}

/**
* Mature transcript – transcript after processing
*/
export interface TranscriptInfo extends GenomeFeature {
type: GenomeFeatureType.Transcript,
name?: string,
startIndex: number,
length: number,
class: TranscriptClass,
soClass: keyof SoTranscriptClass,
}

export enum TranscriptComponentClass {
Exon,
Untranslated,
ProteinCodingSequence,
}

export interface TranscriptComponentInfo extends GenomeFeature {
type: GenomeFeatureType.TranscriptComponent,
name?: string,
startIndex: number,
length: number,
class: TranscriptComponentClass,
soClass: keyof SoTranscriptComponentClass,
phase?: number, // see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#description-of-the-format
}

// small sub set of SO terms found in the Ensemble gff3 files
// for a more complete set, we should use data from https://github.com/The-Sequence-Ontology/SO-Ontologies
export class SoGeneClass {
[key: string]: undefined | GeneClass;

readonly 'gene' = GeneClass.Unspecified;
readonly 'ncRNA_gene' = GeneClass.NonProteinCoding;
readonly 'pseudogene' = GeneClass.Pseudo;

static readonly instance = new SoGeneClass();
}

export class SoTranscriptClass {
[key: string]: undefined | TranscriptClass;

readonly 'transcript' = TranscriptClass.Unspecified;
readonly 'lnc_RNA' = TranscriptClass.NonProteinCoding;
readonly 'mRNA' = TranscriptClass.ProteinCoding;
readonly 'pseudogenic_transcript' = TranscriptClass.Unspecified;
readonly 'miRNA' = TranscriptClass.NonProteinCoding;
readonly 'ncRNA' = TranscriptClass.NonProteinCoding;
readonly 'rRNA' = TranscriptClass.NonProteinCoding;
readonly 'scRNA' = TranscriptClass.NonProteinCoding;
readonly 'snoRNA' = TranscriptClass.NonProteinCoding;
readonly 'snRNA' = TranscriptClass.NonProteinCoding;

static readonly instance = new SoTranscriptClass();
}

export class SoTranscriptComponentClass {
[key: string]: undefined | TranscriptComponentClass;

readonly 'CDS' = TranscriptComponentClass.ProteinCodingSequence;
readonly 'exon' = TranscriptComponentClass.Exon;
readonly 'five_prime_UTR' = TranscriptComponentClass.Untranslated;
readonly 'three_prime_UTR' = TranscriptComponentClass.Untranslated;

static readonly instance = new SoTranscriptComponentClass();
}
9 changes: 4 additions & 5 deletions tools/genome-preprocess/src/vcf/Convert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ export function vcfConvert(inputFilePath: string, outputDirectory: string): Prom
});
}

// @! temporary for demo
let species = 'l_fortunei';
saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${species.toLowerCase()}.vvariants-dir/${species.toLowerCase()}`);
let inputFilename = path.basename(inputFilePath);
saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${inputFilename.toLowerCase()}.vvariants-dir/${inputFilename.toLowerCase()}`);

// @! temporary, save out genes for biobureau demo
filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf));
// filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf));

resolve(filesWritten);
}
Expand Down Expand Up @@ -130,7 +129,7 @@ function biobureauGenerateGenes(inputFilePath: string, outputDirectory: string,
throw `Biobureau demo: filename does not match (@! remove this)`;
}

let filesWritten = new Set();
let filesWritten = new Set<string>();

let biobureauGeneTileset = new AnnotationTileset(
lodLevel0TileSize, // ~1 million,
Expand Down