-
Notifications
You must be signed in to change notification settings - Fork 3
/
extract_english_json.ts
100 lines (89 loc) · 3.98 KB
/
extract_english_json.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import * as fs from "fs";
import * as path from "path";
import { parse, j2xParser as JS2XMLParser } from "fast-xml-parser";
import * as AdmZip from "adm-zip";
import * as rimraf from "rimraf";
import { extractStoryMap, getStoriesForSpread, removeForbiddenCharacters, getSpreadIdsInOrder, pageFileNameForSpreadId, TranslationEntry, getIDMLFilePathForName, extractStoryPSRList, psrListToHTML } from "./shared_functions";
import { exit } from "process";
let inputFolder = "./input";
let translateJSONFolder = "./translate_json";
let tempFolder = "./temp";
rimraf(tempFolder, (err) => {
if (err) {
console.error("Error removing temp directory");
}
console.log("Removed old temp directory");
fs.mkdirSync(tempFolder);
fs.readdirSync(inputFolder).forEach((idmlName) => {
let inputSubPath = path.join(inputFolder, idmlName);
if (fs.statSync(inputSubPath).isDirectory()) {
extractEnglishJSON(idmlName);
}
});
;
});
function extractEnglishJSON(idmlName: string) {
const tempPath = path.join(tempFolder, idmlName);
fs.mkdirSync(tempPath);
let inputFilePath = getIDMLFilePathForName(inputFolder, idmlName);
if (inputFilePath === null) {
console.warn("Could not find IDML file for ", idmlName);
return;
}
console.log("Extracting English text from " + inputFilePath);
const inputZip = new AdmZip(inputFilePath);
const tempEnPath = path.join(tempPath, idmlName);
if (!fs.existsSync(tempEnPath)) {
fs.mkdirSync(tempEnPath);
}
inputZip.extractAllTo(tempEnPath);
const translateJSONPath = path.join(translateJSONFolder, idmlName);
if (!fs.existsSync(translateJSONPath)) {
fs.mkdirSync(translateJSONPath);
}
if (!fs.existsSync(path.join(translateJSONPath, "en"))) {
fs.mkdirSync(path.join(translateJSONPath, "en"));
}
const spreadIdsInOrder = getSpreadIdsInOrder(tempEnPath);
const spreadsPath = path.join(tempEnPath, "Spreads");
const storiesPath = path.join(tempEnPath, "Stories");
// const storyIdsBySpreadFile: { [ spreadFile: string]: string[] } = {};
fs.readdirSync(spreadsPath).forEach((spreadFile) => {
const spreadId = spreadFile.replace("Spread_", "").replace(".xml", "");
const spreadFilePath = path.join(spreadsPath, spreadFile)
const spreadFileContents = fs.readFileSync(spreadFilePath).toString();
const storyIds = getStoriesForSpread(spreadFileContents);
let spreadTranslateMap = {};
const translateStructure: TranslationEntry[] = [];
storyIds.forEach((storyId) => {
let storyFile = `Story_${storyId}.xml`;
const storyFileContents = fs.readFileSync(path.join(storiesPath, storyFile)).toString();
const psrList = extractStoryPSRList(storyFileContents);
const hasLinks = psrList.filter((psr) => psr.type === "hyperlink").length > 0;
if (hasLinks) {
let html = psrListToHTML(psrList);
const entry: TranslationEntry = {
sourceText: removeForbiddenCharacters(html),
text: removeForbiddenCharacters(html),
note: "",
type: "html",
storyId: storyId
};
translateStructure.push(entry);
} else {
psrList.forEach((psr) => {
const entry: TranslationEntry = {
sourceText: removeForbiddenCharacters(psr.content),
text: removeForbiddenCharacters(psr.content),
note: "",
type: "text",
storyId: storyId
};
translateStructure.push(entry);
});
}
});
const pageFileName = pageFileNameForSpreadId(spreadIdsInOrder, spreadId);
fs.writeFileSync(path.join(translateJSONPath, "en", pageFileName), JSON.stringify(translateStructure, null, 4));
});
}