From 96e1a781fe1580c60cd920ba1ae9c93868bde6a6 Mon Sep 17 00:00:00 2001 From: Thomas Parisot Date: Mon, 30 Sep 2024 16:21:24 +0200 Subject: [PATCH] feat(export): supprime l'utilisation de Pandoc pour se baser sur stylo-export --- README.md | 3 +- export/dockerfile | 47 +-- export/src/assets/preview.css | 611 ++++++++++++++++++++++++++++++++++ export/src/export.js | 144 +++----- export/vendors/git-diff.py | 70 ---- 5 files changed, 665 insertions(+), 210 deletions(-) create mode 100644 export/src/assets/preview.css delete mode 100644 export/vendors/git-diff.py diff --git a/README.md b/README.md index 4ab4ca53d..a42405ba7 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,13 @@ Plus d'informations sur [la documentation](http://stylo-doc.ecrituresnumeriques. - Node.js v18+ - MongoDB -- (optionnel) Pandoc, pour le [service d'export](./export) ## Sous MacOS ```bash brew tap mongodb/brew -brew install pandoc mongodb-community nvm +brew install mongodb-community nvm brew install --cask docker nvm install v18 --default diff --git a/export/dockerfile b/export/dockerfile index 78ba39891..1fed22917 100644 --- a/export/dockerfile +++ b/export/dockerfile @@ -1,49 +1,4 @@ -FROM node:18-bullseye - -WORKDIR /usr/src/app - -# install graphviz -RUN export DEBIAN_FRONTEND=noninteractive \ - && apt-get update -y \ - && apt-get upgrade -y \ - && apt-get install -y \ - abcm2ps \ - ca-certificates \ - cm-super \ - curl \ - fontconfig \ - fonts-liberation \ - git \ - graphviz \ - imagemagick \ - inotify-tools \ - make \ - python3-pygraphviz \ - python3 \ - wget \ - && apt-get clean -y \ - && rm -rf /var/lib/apt/lists/* - -#install pandoc -ENV PKGREL 1 -ENV VERSION 2.12 -ARG TARGETARCH=amd64 -ADD https://github.com/jgm/pandoc/releases/download/${VERSION}/pandoc-${VERSION}-${PKGREL}-${TARGETARCH}.deb /pandoc.deb -RUN export DEBIAN_FRONTEND=noninteractive \ - && dpkg -i /pandoc.deb \ - && rm /pandoc.deb - - RUN git clone --single-branch --branch 1.4.3 https://github.com/jgm/pandocfilters.git /pandocfilters \ - && cd /pandocfilters \ - && python3 setup.py install \ - && cp examples/*.py /usr/bin \ - && ls examples/*.py > /installed-pandocfilters.txt \ - && rm -rf /pandocfilters - -ADD vendors/git-diff.py /usr/bin/git-diff.py -RUN echo "examples/git-diff.py" >> /installed-pandocfilters.txt - -RUN sed -i 's#examples#/usr/bin#' /installed-pandocfilters.txt +FROM node:18-alpine # Install app dependencies # A wildcard is used to ensure both package.json AND package-lock.json are copied diff --git a/export/src/assets/preview.css b/export/src/assets/preview.css new file mode 100644 index 000000000..b637111ef --- /dev/null +++ b/export/src/assets/preview.css @@ -0,0 +1,611 @@ +/* +* I add this to html files generated with pandoc. +*/ + +@media only screen and (max-width: 1000px) { + nav { + display: none; + } +} + +html { + font-size: 100%; + overflow-y: scroll; + -webkit-text-size-adjust: 100%; + -ms-text-size-adjust: 100%; +} + +body { + color: #444; + font-family: Georgia, Palatino, 'Palatino Linotype', Times, 'Times New Roman', serif; + font-size: 12px; + line-height: 1.7; + padding: 1em; + margin: auto; + max-width: 42em; + background: #fefefe; +} + +a { + color: #0645ad; + text-decoration: none; +} + +a:visited { + color: #0b0080; +} + +a:hover { + color: #06e; +} + +a:active { + color: #faa700; +} + +a:focus { + outline: thin dotted; +} + +*::-moz-selection { + background: rgba(255, 255, 0, 0.3); + color: #000; +} + +*::selection { + background: rgba(255, 255, 0, 0.3); + color: #000; +} + +a::-moz-selection { + background: rgba(255, 255, 0, 0.3); + color: #0645ad; +} + +a::selection { + background: rgba(255, 255, 0, 0.3); + color: #0645ad; +} + +p { + margin: 1em 0; +} + +img { + max-width: 100%; +} + +h1, h2, h3, h4, h5, h6 { + color: #111; + line-height: 125%; + margin-top: 2em; + font-weight: normal; +} + +h4, h5, h6 { + font-weight: bold; +} + +h1 { + font-size: 2.5em; +} + +h2 { + font-size: 2em; +} + +h3 { + font-size: 1.5em; +} + +h4 { + font-size: 1.2em; +} + +h5 { + font-size: 1em; +} + +h6 { + font-size: 0.9em; +} + +blockquote { + color: #666666; + margin: 0; + padding-left: 3em; + border-left: 0.5em #EEE solid; +} + +hr { + display: block; + height: 2px; + border: 0; + border-top: 1px solid #aaa; + border-bottom: 1px solid #eee; + margin: 1em 0; + padding: 0; +} + +pre, code, kbd, samp { + color: #000; + font-family: monospace, monospace; + _font-family: 'courier new', monospace; + font-size: 0.98em; +} + +pre { + white-space: pre; + white-space: pre-wrap; + word-wrap: break-word; +} + +b, strong { + font-weight: bold; +} + +dfn { + font-style: italic; +} + +ins { + background: #ff9; + color: #000; + text-decoration: none; +} + +mark { + background: #ff0; + color: #000; + font-style: italic; + font-weight: bold; +} + +sub, sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +sup { + top: -0.5em; +} + +sub { + bottom: -0.25em; +} + +ul, ol { + margin: 1em 0; + padding: 0 0 0 2em; +} + +li p:last-child { + margin-bottom: 0; +} + +ul ul, ol ol { + margin: .3em 0; +} + +dl { + margin-bottom: 1em; +} + +dt { + font-weight: bold; + margin-bottom: .8em; +} + +dd { + margin: 0 0 .8em 2em; +} + +dd:last-child { + margin-bottom: 0; +} + +img { + border: 0; + -ms-interpolation-mode: bicubic; + vertical-align: middle; +} + +figure { + display: block; + text-align: center; + margin: 1em 0; +} + +figure img { + border: none; + margin: 0 auto; +} + +figcaption { + font-size: 0.8em; + font-style: italic; + margin: 0 0 .8em; +} + +table { + margin-bottom: 2em; + border-bottom: 1px solid #ddd; + border-right: 1px solid #ddd; + border-spacing: 0; + border-collapse: collapse; +} + +table th { + padding: .2em 1em; + background-color: #eee; + border-top: 1px solid #ddd; + border-left: 1px solid #ddd; +} + +table td { + padding: .2em 1em; + border-top: 1px solid #ddd; + border-left: 1px solid #ddd; + vertical-align: top; +} + +.author { + font-size: 1.2em; + text-align: center; +} + +@media only screen and (min-width: 480px) { + body { + font-size: 14px; + } +} +@media only screen and (min-width: 768px) { + body { + font-size: 16px; + } +} +@media print { + * { + background: transparent !important; + color: black !important; + filter: none !important; + -ms-filter: none !important; + } + + body { + font-size: 12pt; + max-width: 100%; + } + + a, a:visited { + text-decoration: underline; + } + + hr { + height: 1px; + border: 0; + border-bottom: 1px solid black; + } + + a[href]:after { + content: " (" attr(href) ")"; + } + + abbr[title]:after { + content: " (" attr(title) ")"; + } + + .ir a:after, a[href^="javascript:"]:after, a[href^="#"]:after { + content: ""; + } + + pre, blockquote { + border: 1px solid #999; + padding-right: 1em; + page-break-inside: avoid; + } + + tr, img { + page-break-inside: avoid; + } + + img { + max-width: 100% !important; + } + + @page :left { + margin: 15mm 20mm 15mm 10mm; +} + + @page :right { + margin: 15mm 10mm 15mm 20mm; +} + + p, h2, h3 { + orphans: 3; + widows: 3; + } + + h2, h3 { + page-break-after: avoid; + } +} + +nav::before { + content: 'Table des matières'; + font-weight: bold; + padding-left: 2em; +} + +nav { +position: fixed; +top: 0; +left: 0; +font-weight: 500; +padding-top: 3em; +width: 20%; +overflow-y: auto; +height: 100%; +} + +nav li { + list-style-type: none; + padding-top: 0.8em; + line-height: 1.3em; + +} + +header { + border: 1px solid rgb(200,200,200); + padding:10px; + background-color: rgb(240,240,240); + +} + +#schema-scholarly-article > span[property=name]:nth-child(1){ + font-size: xx-large; + line-height: 2em; + text-align: center; +} + +#schema-scholarly-article > span[property=name]:nth-child(3){ + font-size: larger; + line-height: 2em; + text-align: center; +} + + +#schema-scholarly-article > span[property=author]{ + font-size: large; + padding-top: 20px; + line-height: 2em; +} + +div.resume { + margin-top: 10px; +} + +div.resume[lang=en]::before { + content:"Abstract: "; + font-weight: bold; + } + +div.resume[lang=fr]::before { + content:"Résumé : "; + font-weight: bold; + } + +div.resume[lang=es]::before { + content:"Resumen: "; + font-weight: bold; + } + +div.resume[lang=pt]::before { + content:"Abstrato: "; + font-weight: bold; + } + +div.resume[lang=uk]::before { + content:"Pеферат: "; + font-weight: bold; + } + +div.resume[lang=de]::before { + content:"Abstrakt: "; + font-weight: bold; + } + +div.resume[lang=it]::before { + content:"Astratto: "; + font-weight: bold; + } + +.keywords::before { + content:"Catégories :"; + font-weight: bold; + } + +div.keywords { + margin-top: 10px; +} + +.keywords > div { + display: inline-block; +} + +.keywords > div::after { + content: " / "; +} + +.keywords > div:last-child::after { + content: "."; +} + +div.authorKeywords_fr { + margin-top: 10px; +} + +div.authorKeywords_fr span::before { + content: 'Mots-clés auteur : '; + font-weight: bold; +} + +div.authorKeywords_fr span::after { + content: '.'; +} + +div.authorKeywords_en span::before { + content: 'Keywords: '; + font-weight: bold; +} + +div.authorKeywords_en span::after { + content: '.'; +} + +hr#startArticle { + margin-top: 4em; + height: 1px; + border-top : 1px solid rgb(200,200,200) +} + +p span.epigraphe { + margin-left: 10%; + margin-right: 20%; + text-align: left; + margin-bottom: 2em; + float: right; + font-style: italic; +} + +p span.epigraphe span.source { + content: '— '; +} + +p span.dedicace { + margin-left: 10%; + text-align: left; + margin-bottom: 2em; + float: right; + font-style: italic; +} + +p span.note { + margin-left: 10%; + text-align: left; + margin-bottom: 2em; + float: right; + font-style: italic; +} + +span.these { + text-decoration: underline double #a91e58; +} + +span.these:hover::before { + content: "[These: "; + position:relative; + color: #a91e58; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.these:hover::after { + content: "]"; + position:relative; + color: #a91e58; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.exemple { + text-decoration: underline double #b37114; +} + +span.exemple:hover::before { + content: "[Exemple: "; + position:relative; + color: #b37114; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.exemple:hover::after { + content: "]"; + position:relative; + color: #b37114; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.concept { + text-decoration: underline double #14b371; +} + +span.concept:hover::before { + content: "[Concept: "; + position:relative; + color: #14b371; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.concept:hover::after { + content: "]"; + position:relative; + color: #14b371; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.definition { + text-decoration: underline double #1456b3; +} + +span.definition:hover::before { + content: "[Definition: "; + position:relative; + color: #1456b3; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.definition:hover::after { + content: "]"; + position:relative; + color: #1456b3; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.question { + text-decoration: underline double #ff7214 ; +} + +span.question:hover::before { + content: "[Question: "; + position:relative; + color: #ff7214; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} + +span.question:hover::after { + content: "]"; + position:relative; + color: #ff7214; + font-size: 1em; + padding-left:5px; + padding-right:5px; +} diff --git a/export/src/export.js b/export/src/export.js index 01c785e4c..faa59d0d2 100644 --- a/export/src/export.js +++ b/export/src/export.js @@ -1,18 +1,14 @@ -const fs = require('node:fs').promises -const path = require('node:path') -const os = require('node:os') -const util = require('node:util') -const exec = util.promisify(require('node:child_process').exec) - const config = require('./config.js') const archiver = require('archiver') - +const { readFile } = require('node:fs/promises') +const { join } = require('node:path') const { logger } = require('./logger') const { FindByIdNotFoundError } = require('./helpers/errors') const { normalize } = require('./helpers/filename') const { getArticleById, getVersionById, getCorpusById } = require('./graphql') const canonicalBaseUrl = config.get('export.canonicalBaseUrl') +const exportEndpoint = config.get('export.urlEndpoint') const exportZip = async ({ bib, yaml, md, id, versionId, title }, res, _) => { const filename = `${normalize(title)}.zip` @@ -25,101 +21,65 @@ const exportZip = async ({ bib, yaml, md, id, versionId, title }, res, _) => { return archive.finalize() } -function generatePandocCommand ( - preview, - markdownFilePath, - bibliographyFilePath, - metadataFilePath -) { - const templatesDirPath = path.join(__dirname, 'templates') - let templateArg = `--template=${path.join( - templatesDirPath, - 'publish.html' - )}` - if (preview) { - templateArg = `--template=${path.join( - templatesDirPath, - 'preview.html' - )} --include-in-header=${path.join(templatesDirPath, 'preview-styles.html')}` - } - const cslFilePath = path.join(templatesDirPath, 'chicagomodified.csl') - // https://github.com/jgm/pandoc/blob/main/MANUAL.txt - // `pandoc` [*options*] [*input-file*]... - return `pandoc \ ---metadata-file=${metadataFilePath} \ ---bibliography=${bibliographyFilePath} \ ---standalone \ -${templateArg} \ ---section-divs \ ---ascii \ ---toc \ ---csl=${cslFilePath} \ ---citeproc \ ---from=markdown \ --to=html5 \ -${markdownFilePath}` +/** + * + * @param {{md_content: String, bib_content: String, yaml_content: String, bibliography_style: String, with_toc: Boolean}} bodyOptions + * @returns {Promise} + */ +async function getStyloExportHtmlOutput (bodyOptions) { + const body = new FormData() + Object.entries(bodyOptions).forEach(([key, value]) => body.append(key, value)) + + return fetch(`${exportEndpoint}/api/article_preview`, { + method: 'POST', + body + }).then(response => response.text()) } const exportHtml = async ({ bib, yaml, md, id, versionId, title }, res, req) => { - const preview = req.query.preview + const preview = Boolean(req.query.preview) const originalUrl = req.originalUrl - let tmpDirectory - try { - tmpDirectory = await fs.mkdtemp(path.join(os.tmpdir(), 'stylo-')) - - // write files into the temporary directory - const markdownFilePath = path.join(tmpDirectory, `${id}.md`) - const bibliographyFilePath = path.join(tmpDirectory, `${id}.bib`) - const metadataFilePath = path.join(tmpDirectory, `${id}.yaml`) - - await Promise.all([ - fs.writeFile(markdownFilePath, md, 'utf8'), - fs.writeFile(bibliographyFilePath, bib, 'utf8'), - fs.writeFile(metadataFilePath, yaml, 'utf8'), - ]) - - // pandoc command - const pandocCommand = generatePandocCommand( - preview, - markdownFilePath, - bibliographyFilePath, - metadataFilePath + let html5 = await getStyloExportHtmlOutput({ + md_content: md, + bib_content: bib, + yaml_content: yaml, + with_toc: preview, + bibliography_style: 'chicagomodified' + }) + + if (canonicalBaseUrl && !html5.includes('\s?)/gs, + `$1` ) - const FIFTEEN_MEGABYTES = 15 * 1024 * 1024 - const { stdout, stderr } = await exec(pandocCommand, { maxBuffer: FIFTEEN_MEGABYTES }) - if (stderr) { - logger.warn(stderr) - } - let html5 = stdout - if (canonicalBaseUrl && !html5.includes('\s?)/gs, - `$1` - ) - } + } - if (preview) { - html5 = html5.replace(/<\/body>/, () => { - return ` - - ` - }) - } else { - res.attachment(`${normalize(title)}.html`) - } + /** + * HTML Preview can be both for an Article export and the Article Preview (read: proofread with Hypothesis annotations) + * The `preview` argument controls in which context we display the output + */ + if (preview) { + const previewStylesheet = await readFile(join(__dirname, 'assets', 'preview.css'), { encoding: 'utf8' }) - res.send(html5) - } finally { - if (tmpDirectory) { - await fs.rm(tmpDirectory, { recursive: true, maxRetries: 3 }) + html5 = html5.replace(/(<\/head>\s?)/gs, ` + + $1` + ) + + html5 = html5.replace(/<\/body>/, ` + + `) + + return res.send(html5) } + + res.attachment(`${normalize(title)}.html`) } const getArticleExportContext = async (articleId) => { diff --git a/export/vendors/git-diff.py b/export/vendors/git-diff.py deleted file mode 100644 index 6492a0d4b..000000000 --- a/export/vendors/git-diff.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 - -""" - Pandoc filter to process a code block with class "git-diff" to present the - diff of a file between two commits. - - class variables: - * dir: Directory from (optional, defaults to ".") - * objects: Which object in `dir` (optional defaults to ".") - * commit-range: commitrange (like: "HEAD..v1.0", optional, defaults to HEAD) - * diffoptions: git diff options (like: -U0, optional, defaults to "") - - Example: (git -C /devel/jog diff ..23fe4 -- README.md) - ```{.git-diff commit-range="..23fe4" dir="/devel/jog" object="README.md"} - - Example: (git diff ..v1.0) - ```{.git-diff commit-range="..v1.0"} - - Example: - ```{.git-diff commit-range="..v1.0" objects="README.md src/u src/c" diffoptions="-U0"} - """ - -from pandocfilters import toJSONFilter, RawBlock, CodeBlock -import subprocess; - -def gitdiff(key, value, format, meta): - if key == "CodeBlock": - [[ident, classes, keyvals], contents] = value - if "git-diff" in classes: - folder = "." - commra = "" - obj = "" - objdiv = "" - options = "" - - # its a diff - view - if not "diff" in classes: - classes.append("diff") - - for el in keyvals: - if "commit-range" in el: - commra = el[1] - - if "dir" in el: - folder = el[1] - - if "objects" in el or "object" in el: - objdiv = "--" - obj = "%s %s" % (obj, el[1]) - - if "diffoptions" == el[0]: - options = el[1] - - command_string = "git -C %s diff %s %s %s %s" % (folder, options, commra, objdiv, obj) - out = "" - try: - l = list(filter(None, command_string.split(" "))) - out = subprocess.check_output(l) - except subprocess.CalledProcessError as err: - return None - - if out != None or out != "": - out = out.decode("utf-8"); - return [CodeBlock([ident, classes, keyvals], out), CodeBlock([ident, classes, keyvals], contents)] - else: - return None - -if __name__ == "__main__": - toJSONFilter(gitdiff) -