Skip to content

Commit

Permalink
Updates the data by default daily
Browse files Browse the repository at this point in the history
  • Loading branch information
scourgemancer committed Apr 6, 2020
1 parent 238872e commit 9448a2f
Show file tree
Hide file tree
Showing 10 changed files with 459 additions and 35 deletions.
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ services:
environment:
GENES_FILE: "/ifad/files/gene-types.txt"
ANNOTATIONS_FILE: "/ifad/files/tair.gaf"
UPDATE_INTERVAL: 86400000
UPDATE_AT_MIDNIGHT: true
ports:
- 80:3000
volumes:
Expand Down
125 changes: 125 additions & 0 deletions src/__tests__/data-download/february/tair.gaf

Large diffs are not rendered by default.

Binary file added src/__tests__/data-download/february/tair.gaf.gz
Binary file not shown.
36 changes: 36 additions & 0 deletions src/__tests__/data-download/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import express from "express";
import { StructuredData } from "../../ingest";
import { getDataset } from "../../data_fetcher";

const serveFile = (filename: string, callback?: () => any) => {
const app = express();
app.use("/annotations", express.static(filename));
return app.listen(8080, () => {
console.log(`Serving file ${filename}`);
if (callback) { callback(); }
});
};

test("data updates when files on the server change", async (done) => {
const path = require("path");
process.env["UPDATE_INTERVAL"] = "5000";
process.env["UPDATE_AT_MIDNIGHT"] = "false";
process.env["SERVER_LIFETIME_LENGTH"] = "20000";
process.env["FILE_URL"] = "http://localhost:8080/annotations/tair.gaf.gz";
const januaryServer = await serveFile(path.join(__dirname + "/january"),
() => {
const backend = require(path.join(__dirname + "/../../index.ts"));
const januaryData: StructuredData = getDataset();

setTimeout(async () => {
januaryServer.close();
let februaryServer = await serveFile(path.join(__dirname + "/february"));
setTimeout(() => februaryServer.close(), 10000);
}, 10000);
setTimeout(() => {
const februaryData: StructuredData = getDataset();
expect(januaryData).not.toEqual(februaryData);
done();
}, 12000);
});
}, 30000);
125 changes: 125 additions & 0 deletions src/__tests__/data-download/january/tair.gaf

Large diffs are not rendered by default.

Binary file added src/__tests__/data-download/january/tair.gaf.gz
Binary file not shown.
71 changes: 71 additions & 0 deletions src/__tests__/test_data_fetcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import express from "express";
import { StructuredData } from "../ingest";
import {startPeriodicallyCalling, getDataset} from "../data_fetcher";

describe("Periodically calling functions", () => {

it("should call once for each period", () => {
let foo = jest.fn();
startPeriodicallyCalling(foo, 300, new Date());
setTimeout(() => expect(foo).toHaveBeenCalledTimes(3), 1000);
});

it("should only start calling after the start date", () => {
let bar = jest.fn();
const startDate = new Date();
startDate.setMilliseconds(startDate.getMilliseconds() + 500);
startPeriodicallyCalling(bar, 200, startDate);
setTimeout(() => expect(bar).toHaveBeenCalledTimes(0), 400);
setTimeout(() => expect(bar).toHaveBeenCalledTimes(2), 1000);
});

it("should stop running when a lifetime is provided", () => {
let foobar = jest.fn();
startPeriodicallyCalling(foobar, 200, new Date(), 300);
setTimeout(() => expect(foobar).toHaveBeenCalledTimes(1), 500);
});
});

test("The data getter reflects updates when the server changes",
async () => {
const path = require("path");

process.env["UPDATE_INTERVAL"] = "5000";
process.env["UPDATE_AT_MIDNIGHT"] = "false";
process.env["SERVER_LIFETIME_LENGTH"] = "20000";
process.env["FILE_URL"] = "http://localhost:8080/annotations/tair.gaf.gz";

const januaryServer = serveFile(path.join(__dirname + "/january"));

await new Promise(() => setTimeout(() => {
januaryServer.close();
let februaryServer = serveFile(path.join(__dirname + "/february"));
setTimeout(() => februaryServer.close(), 10000);
}, 10000));

await new Promise(() => setTimeout(async () => {
const backend = require(path.join(__dirname + "/../../src/index.ts"));

const januaryData: StructuredData = getDataset();
await new Promise(() => setTimeout(() => {
const februaryData: StructuredData = getDataset();
console.log(januaryData == februaryData);
console.log(januaryData === februaryData);
console.log(JSON.stringify(januaryData).split('at').length - 1);
console.log(JSON.stringify(februaryData).split('at').length - 1);
expect(januaryData).not.toEqual(februaryData);
expect(januaryData).toEqual(februaryData);
}, 12000));
}, 5000));
}, 25000);

const serveFile = (filename: string, callback?: () => any) => {
const app = express();
app.use("/annotations", express.static(filename));
return app.listen(8080, () => {
console.log(`Serving file ${filename}`);
if (callback) {
callback();
}
});
};
73 changes: 73 additions & 0 deletions src/data_fetcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import {readFileSync} from "fs";
import {resolve} from "path";
import {ingestData, StructuredData, UnstructuredText} from "./ingest";

let mostRecentDataset: StructuredData;

export const getDataset = (): StructuredData => { return mostRecentDataset; };

export const downloadData = (dataUrl: string) => {
const { spawnSync } = require("child_process");
const path = require("path");
const filepath = path.join(__dirname + "/../assets/");
spawnSync("cd " + filepath + " ; rm tair.gaf");
spawnSync("cd " + filepath + " ; wget " + dataUrl);
spawnSync("cd " + filepath + " ; gunzip *.gz");
}

export const updateData = () => {
const file_url = process.env["FILE_URL"] || "http://current.geneontology.org/annotations/tair.gaf.gz";
downloadData(file_url);
console.log("Begin reading data");
const genesText = readFileSync(resolve("assets/gene-types.txt")).toString();
const annotationsText = readFileSync(resolve("assets/tair.gaf")).toString();
const unstructuredText: UnstructuredText = {genesText, annotationsText};
const maybeDataset = ingestData(unstructuredText);
if (!maybeDataset) throw new Error("failed to parse data");
const dataset: StructuredData = maybeDataset;
mostRecentDataset = dataset;
console.log("Finished parsing data");
};

export const startPeriodicallyCalling = (fn: (...args: any[]) => void, interval: number = (1000 * 60 * 60 * 24), startDate: Date = new Date(), lifetime?: number) => {
const now = new Date();

const update_at_midnight: string = process.env["UPDATE_AT_MIDNIGHT"] || "true";
if (update_at_midnight === "true") {
startDate = getTomorrowMorning();
}

let timer;
if (isSameTimeOfDay(startDate, now)) {
timer = setInterval(fn, interval);
} else {
let difference = startDate.getTime() - now.getTime();
if (difference > 0) {
if (lifetime) {
setTimeout(startPeriodicallyCalling, difference, fn, interval, startDate, lifetime);
} else {
setTimeout(startPeriodicallyCalling, difference, fn, interval, startDate);
}
return;
} else {
timer = setInterval(fn, interval); // Just begins the updates now if the startDate is in the past
}
}

if (lifetime) {
setTimeout(clearInterval, lifetime, timer);
}
};

export const getTomorrowMorning = (): Date => {
const tomorrowMorning: Date = new Date();
tomorrowMorning.setDate(tomorrowMorning.getDate() + 1);
tomorrowMorning.setHours(0);
tomorrowMorning.setMinutes(0);
tomorrowMorning.setSeconds(0);
return tomorrowMorning;
};

const isSameTimeOfDay = (time1, time2) => {
return time1.getHours() === time2.getHours() && time1.getMinutes() === time2.getMinutes();
}
13 changes: 12 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ import {V1Service} from "./services/v1";
import {resolve} from "path";
import cors from "cors";
import compression from "compression";
import { updateData, startPeriodicallyCalling, getTomorrowMorning } from './data_fetcher';

updateData();
const default_interval: string = (1000 * 60 * 60 * 24).toString();
const interval: string = process.env["UPDATE_INTERVAL"] || default_interval;
const update_at_midnight: string = process.env["UPDATE_AT_MIDNIGHT"] || 'true';
if (update_at_midnight === "true") {
startPeriodicallyCalling(updateData, parseInt(interval), getTomorrowMorning());
} else {
startPeriodicallyCalling(updateData, parseInt(interval));
}

const app = express();
app.use(cors());
Expand All @@ -16,4 +27,4 @@ app.use("/app/", express.static(process.env["FRONTEND_PUBLIC_PATH"] || resolve("
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`🚢 Now listening on 0.0.0.0:${PORT} 🔥`);
});
});
49 changes: 15 additions & 34 deletions src/services/v1.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,10 @@
import {readFileSync} from "fs";
import {resolve} from "path";
import {Errors, GET, Path, QueryParam, Return, ContextResponse} from "typescript-rest";
import {GeneProductTypeFilter, Query, queryDataset, QueryOption, Segment, Strategy} from "../queries";
import {
AnnotationStatus,
Aspect,
ingestData,
makeAnnotationIndex,
StructuredData,
UnstructuredText
} from "../ingest";
import {AnnotationStatus, Aspect, makeAnnotationIndex} from "../ingest";
import {QueryOption, Segment, Strategy} from "../queries";
import {annotationsToGAF, genesToCSV, buildGenesMetadata, buildAnnotationMetadata} from '../export';
import { getDataset } from '../data_fetcher';
import express from "express";

// TODO use data fetcher rather than files.
console.log("Begin reading data");
const genesText = readFileSync(process.env["GENES_FILE"] || resolve("assets/gene-types.txt")).toString();
const annotationsText = readFileSync(process.env["ANNOTATIONS_FILE"] || resolve("assets/tair.gaf")).toString();
const unstructuredText: UnstructuredText = {genesText, annotationsText};
const maybeDataset = ingestData(unstructuredText);
if (!maybeDataset) throw new Error("failed to parse data");
const dataset: StructuredData = maybeDataset;
console.log("Finished parsing data");

type Format = "gaf" | "gene-csv" | "json";

type QueryStatus = "EXP" | "OTHER" | "UNKNOWN" | "UNANNOTATED";
Expand Down Expand Up @@ -81,8 +63,15 @@ export class V1Service {
segments_meta.strategy = strategy;
}

const query: Query = { filter, option: option };
const queriedDataset = queryDataset(dataset, query);
const dataset = getDataset();

// TODO include unannotated genes
const queriedDataset = queryAnnotated(dataset, query);

// TODO include unannotated genes
const format = validateFormat(maybeFormat);

const filters_meta = {filters: segments.map(f=>`${f.aspect}-${f.annotationStatus}`).join(", ")};

switch (format) {
case "gaf":
Expand Down Expand Up @@ -111,17 +100,9 @@ export class V1Service {

@Path("/wgs_segments")
@GET
get_wgs(
/**
* ?filter=""
* This filter describes which subset of Genes will be used for querying.
* The option for filter are "all" | "include_protein" | "exclude_pseudogene".
*/
@QueryParam("filter") maybeFilter: string = "exclude_pseudogene",
) {
const filter = validateFilter(maybeFilter);
const query: Query = { filter, option: {tag: "QueryGetAll"} };
let queryResult = queryDataset(dataset, query);
get_wgs() {
const dataset = getDataset();
const totalGeneCount = Object.keys(dataset.genes.index).length;

const totalGeneCount = Object.keys(queryResult.genes.index).length;
const result = Object.entries(queryResult.annotations.index)
Expand Down

0 comments on commit 9448a2f

Please sign in to comment.