From 2005b064cae18606b186fd960e3897ace810c68e Mon Sep 17 00:00:00 2001 From: Guillaume Grossetie Date: Mon, 18 Nov 2024 16:31:35 +0100 Subject: [PATCH] feat: transforme du format legacy vers le nouveau format (et inversement) --- .../src/schemas/article-metadata.schema.json | 46 ++- .../article-uploaded-to-the-cloud-v0.json | 109 ++++++ .../article-uploaded-to-the-cloud-v1.json | 103 ++++++ graphql/helpers/metadata.js | 331 +++++++++++++++++- graphql/helpers/metadata.test.js | 31 +- 5 files changed, 598 insertions(+), 22 deletions(-) create mode 100644 graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v0.json create mode 100644 graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v1.json diff --git a/front/src/schemas/article-metadata.schema.json b/front/src/schemas/article-metadata.schema.json index f49d45a17..5ad49129e 100644 --- a/front/src/schemas/article-metadata.schema.json +++ b/front/src/schemas/article-metadata.schema.json @@ -109,12 +109,33 @@ "license": { "type": "string" }, + "acknowledgements": { + "type": "string" + }, "localizedContent": { "type": "array", "items": { "$ref": "#/definitions/localizedContent" } }, + "controlledKeywords": { + "type": "array", + "items": { + "type": "object", + "properties": { + "label": { + "type": "string" + }, + "idRameau": { + "type": "string" + }, + "uriRameau": { + "type": "string" + } + } + }, + "uniqueItems": true + }, "authors": { "type": "array", "items": { @@ -139,20 +160,17 @@ "$ref": "#/definitions/person" } }, - "translations": { - "type": "array", - "items": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "url": { - "type": "string" - }, - "lang": { - "$ref": "#/definitions/lang" - } + "translationOf": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "url": { + "type": "string" + }, + "lang": { + "$ref": "#/definitions/lang" } } }, diff --git a/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v0.json b/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v0.json new file mode 100644 index 000000000..19748dd08 --- /dev/null +++ b/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v0.json @@ -0,0 +1,109 @@ +{ + "abstract": [ + { + "lang": "fr", + "text_f": "Cet article se penche sur l’appropriation par trois séries télévisées contemporaines des visions utopiques post-humanistes de la vie après la mort dans un univers virtuel grâce au « téléchargement de conscience », une technologie actuellement inexistante, mais faisant l’objet de nombreuses discussions et projections." + }, + { + "lang": "en", + "text_f": "This article examines how three contemporary television series appropriate the utopian post-humanist visions of life after death in a virtual universe made possible by “mind uploading”, a technology that does not currently exist but is the subject of much discussion and projection." + } + ], + "acknowledgements": "Merci à tous !", + "authors": [ + { + "affiliations": "aff", + "biography": "bio", + "email": "courriel", + "foaf": "foaf", + "forname": "Sylvaine", + "isni": "isni", + "orcid": "0000-0001-6424-3229", + "surname": "Bataille", + "viaf": "viaf", + "wikidata": "wikidata" + } + ], + "controlledKeywords": [ + { + "idRameau": "", + "label": "F# (langage de programmation)", + "uriRameau": "http://data.bnf.fr/ark:/12148/cb16191752f" + } + ], + "date": "2024-05-28", + "diffnum": "Sens Public Web Publishing", + "director": [ + { + "forname": "Marcello", + "surname": "Vitali-Rosati" + } + ], + "dossier": [ + { + "id": "1710", + "title_f": "L’œuvre numérique à son miroir : regards sur les créations digitales contemporaines" + } + ], + "funder": { + "funder_id": "", + "funder_name": "Université de Montréal" + }, + "id": "SP1711", + "issnnum": "2104-3272", + "issueDirectors": [ + { + "forname": "Tony", + "surname": "Gheeraert" + }, + { + "forname": "Mélanie", + "surname": "Lucciano" + }, + { + "forname": "Sandra", + "surname": "Provini" + } + ], + "journal": "Sens Public", + "journal_email": "redaction@sens-public.org", + "journal_issue": "2024.8", + "keywords": [ + { + "lang": "fr", + "list_f": [ + "Arts et lettres", + "Cinéma", + "Fiction" + ] + }, + { + "lang": "en", + "list_f": [ + "Cinema", + "Fiction", + "Narrative" + ] + } + ], + "lang": "fr", + "prod": "Sens Public Prod", + "prodnum": "Sens Public Web Prod", + "publisher": "Sens Public Publishing", + "reviewers": [ + { + "forname": "McCage", + "surname": "Griffiths" + } + ], + "rights": "CC BY-SA 4.0", + "subtitle_f": "Téléchargement de conscience, réflexivité et ré-enchantement de la technologie dans trois fictions sérielles anglophones", + "title_f": "Uploaded to the cloud… Sounds like heaven!", + "translator": [ + { + "forname": "Shaun", + "surname": "Nicks" + } + ], + "url_article": "https://sens-public.org/articles/1711/" +} diff --git a/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v1.json b/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v1.json new file mode 100644 index 000000000..9aad3fbce --- /dev/null +++ b/graphql/helpers/__fixtures__/article-uploaded-to-the-cloud-v1.json @@ -0,0 +1,103 @@ +{ + "type": "article", + "@version": "1.0", + "id": "SP1711", + "publicationDate": "2024-05-28", + "url": "https://sens-public.org/articles/1711/", + "license": "CC BY-SA 4.0", + "acknowledgements": "Merci à tous !", + "lang": "fr", + "title": "Uploaded to the cloud… Sounds like heaven!", + "subtitle": "Téléchargement de conscience, réflexivité et ré-enchantement de la technologie dans trois fictions sérielles anglophones", + "abstract": "Cet article se penche sur l’appropriation par trois séries télévisées contemporaines des visions utopiques post-humanistes de la vie après la mort dans un univers virtuel grâce au « téléchargement de conscience », une technologie actuellement inexistante, mais faisant l’objet de nombreuses discussions et projections.", + "keywords": [ + "Arts et lettres", + "Cinéma", + "Fiction" + ], + "localizedContent": [ + { + "lang": "en", + "abstract": "This article examines how three contemporary television series appropriate the utopian post-humanist visions of life after death in a virtual universe made possible by “mind uploading”, a technology that does not currently exist but is the subject of much discussion and projection.", + "keywords": [ + "Cinema", + "Fiction", + "Narrative" + ] + } + ], + "controlledKeywords": [ + { + "idRameau": "", + "label": "F# (langage de programmation)", + "uriRameau": "http://data.bnf.fr/ark:/12148/cb16191752f" + } + ], + "authors": [ + { + "affiliations": "aff", + "biography": "bio", + "email": "courriel", + "foaf": "foaf", + "forname": "Sylvaine", + "isni": "isni", + "orcid": "0000-0001-6424-3229", + "surname": "Bataille", + "viaf": "viaf", + "wikidata": "wikidata" + } + ], + "issueDirectors": [ + { + "forname": "Tony", + "surname": "Gheeraert" + }, + { + "forname": "Mélanie", + "surname": "Lucciano" + }, + { + "forname": "Sandra", + "surname": "Provini" + } + ], + "reviewers": [ + { + "forname": "McCage", + "surname": "Griffiths" + } + ], + "translators": [ + { + "forname": "Shaun", + "surname": "Nicks" + } + ], + "journalDirectors": [ + { + "forname": "Marcello", + "surname": "Vitali-Rosati" + } + ], + "issue": { + "title": "L’œuvre numérique à son miroir : regards sur les créations digitales contemporaines", + "identifier": "1710", + "number": "2024.8" + }, + "production": { + "issn": "2104-3272", + "producer": "Sens Public Prod", + "prodNum": "Sens Public Web Prod", + "diffNum": "Sens Public Web Publishing" + }, + "funder": { + "organization": "Université de Montréal", + "id": "" + }, + "journal": { + "name": "Sens Public", + "publisher": "Sens Public Publishing", + "email": "redaction@sens-public.org", + "url": "https://www.sens-public.org/" + } +} diff --git a/graphql/helpers/metadata.js b/graphql/helpers/metadata.js index cbd2deb4d..51b90b58a 100644 --- a/graphql/helpers/metadata.js +++ b/graphql/helpers/metadata.js @@ -11,14 +11,14 @@ const FORMATTED_FIELD_RE = /_f$/ * This sorting ensures the `nocite` key is always the last one. * @see https://github.com/EcrituresNumeriques/stylo/issues/425 */ -function sortKeys (a, b) { +function sortKeys(a, b) { if (a === 'nocite') return 1 if (b === 'nocite') return -1 return a.localeCompare(b) } -function walkObject (obj, itemTransformFn) { +function walkObject(obj, itemTransformFn) { Object.entries(obj).forEach(([key, value]) => { itemTransformFn(obj, key, value) @@ -37,13 +37,13 @@ function walkObject (obj, itemTransformFn) { * @param {String} yaml * @returns {Object} */ -function toObject (yaml) { +function toObject(yaml) { const [doc = {}] = YAML.loadAll(yaml, 'utf8') return doc } -function reformat (yaml, { id, originalUrl, replaceBibliography = false }) { +function reformat(yaml, { id, originalUrl, replaceBibliography = false }) { if (!yaml || yaml.trim().length === 0) { return '' } @@ -52,8 +52,7 @@ function reformat (yaml, { id, originalUrl, replaceBibliography = false }) { try { doc = toObject(yaml) - } - catch (error) { + } catch (error) { if (error instanceof YAMLException) { logger.warn(`Unable to parse Document YAML: ${yaml}. Ignoring`, error) return '' @@ -127,7 +126,325 @@ function reformat (yaml, { id, originalUrl, replaceBibliography = false }) { return '---\n' + YAML.dump(doc, { sortKeys }) + '---' } +/** + * @param {{ + * "type": string, + * "@version": string, + * "id": string, + * "publicationDate": string, + * "url": string, + * "lang": string, + * "title": string, + * "subtitle": string, + * "abstract": string, + * "keywords": string[], + * "license": string, + * "acknowledgements": string, + * localizedContent: { + * lang: string, + * title: string, + * subtitle: string, + * abstract: string, + * keywords: string[] + * }[], + * "controlledKeywords": { + * label: string, + * idRameau: string, + * uriRameau: string + * }[] + * "authors": [], + * "reviewers": [], + * "transcribers": [], + * "translators": [], + * "translationOf": { + * lang: string, + * title: string, + * url: string + * }, + * "issue": {} + * "issueDirectors": [], + * "production": { + * issn: string, + * producer: string, + * prodNum: string, + * diffNum: string + * }, + * "funder": { + * organization: string, + * id: string + * }, + * "journal": { + * name: string, + * publisher: string, + * email: string, + * url: string + * }, + * "journalDirectors": [], + * }} metadata + * @returns {{ + * id: string, + * acknowledgements: string, + * date: string, + * journal: string, + * journal_email: string, + * journal_issue: string, + * lang: string, + * "link-citations": string, + * nocite: string, + * prod: string, + * prodnum: string, + * diffnum: string, + * publisher: string, + * rights: string, + * subtitle_f: string, + * title_f: string, + * url_article: string + * issnnum: string, + * funder: { + * funder_id: string, + * funder_name: string, + * }, + * abstract: [], + * articleslies: [], + * authors: [], + * controlledKeywords: [], + * director: [], + * dossier: [], + * issueDirectors: [], + * keywords: [], + * reviewers: [], + * transcribers: [], + * translatedTitle: [], + * translationOf: [], + * translations: [], + * translator: [], + * typeArticle: string[], + * }} + */ +function toLegacyFormat(metadata) { + // unmapped: + // metadata.journal.url + const abstract = [...metadata.localizedContent?.map(c => ({ + lang: c.lang, + text_f: c.abstract + })) ?? [], { + lang: metadata.lang, + text_f: metadata.abstract + }] + return { + id: metadata.id, + acknowledgements: metadata.acknowledgements, + date: metadata.publicationDate, + journal: metadata.journal?.name, + journal_email: metadata.journal?.email, + journal_issue: metadata.issue?.number, + lang: metadata.lang, + "link-citations": "", // MISSING! + nocite: "", // MISSING! + prod: metadata.production?.producer, + prodnum: metadata.production?.prodNum, + diffnum: metadata.production?.diffNum, + publisher: metadata.journal?.publisher, + rights: metadata.license, + subtitle_f: metadata.subtitle, + title_f: metadata.title, + url_article: metadata.url, + issnnum: metadata.production?.issn, + funder: { + funder_id: metadata.funder?.id, + funder_name: metadata.funder?.organization, + }, + abstract: abstract, + articleslies: [], // MISSING! + authors: metadata.authors, + controlledKeywords: metadata.controlledKeywords, + director: metadata.journalDirectors, + dossier: [ + { + id: metadata.issue?.identifier, + title_f: metadata.issue?.title, + } + ], + issueDirectors: metadata.issueDirectors, + keywords: [ + { + lang: metadata.lang, + list_f: metadata.keywords, + }, + ...metadata.localizedContent?.map(c => ({ + lang: c.lang, + list_f: c.keywords + })) ?? [] + ], + reviewers: metadata.reviewers, + transcribers: metadata.transcribers, + translatedTitle: metadata.localizedContent?.map(c => ({ + lang: c.lang, + text_f: c.title + })), + translationOf: [ + metadata.translationOf + ], + translations: [], // MISSING! + translator: metadata.translators, + typeArticle: [], // MISSING! + } +} + +/** + * @param {{ + * id: string, + * acknowledgements: string, + * date: string, + * journal: string, + * journal_email: string, + * journal_issue: string, + * lang: string, + * "link-citations": string, + * nocite: string, + * prod: string, + * prodnum: string, + * diffnum: string, + * publisher: string, + * rights: string, + * subtitle_f: string, + * title_f: string, + * url_article: string + * issnnum: string, + * funder: { + * funder_id: string, + * funder_name: string, + * }, + * abstract: [], + * articleslies: [], + * authors: [], + * controlledKeywords: [], + * director: [], + * dossier: [], + * issueDirectors: [], + * keywords: [], + * reviewers: [], + * transcribers: [], + * translatedTitle: [], + * translationOf: [], + * translations: [], + * translator: [], + * typeArticle: string[], + * }} metadata + * @returns {{ + * "type": string, + * "@version": string, + * "id": string, + * "publicationDate": string, + * "url": string, + * "lang": string, + * "title": string, + * "subtitle": string, + * "abstract": string, + * "keywords": string[], + * "license": string, + * "acknowledgements": string, + * localizedContent: { + * lang: string, + * title: string, + * subtitle: string, + * abstract: string, + * keywords: string[] + * }[], + * "controlledKeywords": { + * label: string, + * idRameau: string, + * uriRameau: string + * }[] + * "authors": [], + * "reviewers": [], + * "transcribers": [], + * "translators": [], + * "translationOf": { + * lang: string, + * title: string, + * url: string + * }, + * "issue": {} + * "issueDirectors": [], + * "production": { + * issn: string, + * producer: string, + * prodNum: string, + * diffNum: string + * }, + * "funder": { + * organization: string, + * id: string + * }, + * "journal": { + * name: string, + * publisher: string, + * email: string, + * url: string + * }, + * "journalDirectors": [], + * }} + */ +function fromLegacyFormat(metadata) { + const translatedAbstract = metadata.abstract?.filter(a => a.lang !== metadata.lang) + const translatedTitle = metadata.translatedTitle?.filter(a => a.lang !== metadata.lang) + const translatedKeywords = metadata.keywords?.filter(a => a.lang !== metadata.lang) + const languages = Array.from(new Set([...translatedAbstract?.map(a => a.lang) ?? [], ...translatedTitle?.map(t => t.lang) ?? [], ...translatedKeywords?.map(k => k.lang) ?? []])) + const localizedContent = languages.map(l => ({ + lang: l, + title: translatedTitle?.find(a => a.lang === l)?.text_f, + abstract: translatedAbstract?.find(a => a.lang === l)?.text_f, + keywords: translatedKeywords?.find(a => a.lang === l)?.list_f, + })) + return { + "type": "article", + "@version": "1.0", + id: metadata.id, + lang: metadata.lang, + title: metadata.title_f, + subtitle: metadata.subtitle_f, + acknowledgements: metadata.acknowledgements, + abstract: metadata.abstract.find(a => a.lang === metadata.lang)?.text_f, + keywords: metadata.keywords.find(k => k.lang === metadata.lang)?.list_f, + controlledKeywords: metadata.controlledKeywords, + publicationDate: metadata.date, + url: metadata.url_article, + license: metadata.rights, + authors: metadata.authors, + reviewers: metadata.reviewers, + transcribers: metadata.transcribers, + translators: metadata.translator, + issueDirectors: metadata.issueDirectors, + journalDirectors: metadata.director, + funder: { + organization: metadata.funder?.funder_name, + id: metadata.funder?.funder_id + }, + journal: { + name: metadata.journal, + publisher: metadata.publisher, + email: metadata.journal_email, + url: undefined // value is not available in legacy format + }, + issue: { + title: metadata.dossier?.[0]?.title_f, + identifier: metadata.dossier?.[0]?.id, + number: metadata.journal_issue + }, + production: { + issn: metadata.issnnum, + producer: metadata.prod, + prodNum: metadata.prodnum, + diffNum: metadata.diffnum + }, + localizedContent: localizedContent + } +} + module.exports = { reformat, - toObject + toObject, + toLegacyFormat, + fromLegacyFormat } diff --git a/graphql/helpers/metadata.test.js b/graphql/helpers/metadata.test.js index 854f4c1b7..0ab4d9eab 100644 --- a/graphql/helpers/metadata.test.js +++ b/graphql/helpers/metadata.test.js @@ -1,4 +1,4 @@ -const { reformat } = require('./metadata') +const { reformat, toLegacyFormat, fromLegacyFormat } = require('./metadata') const YAML = require('js-yaml') const fs = require('node:fs/promises') const path = require('path') @@ -143,3 +143,32 @@ test('should be identical', async () => { const expected = '---\n' + YAML.dump(YAML.load(expectedContent, 'utf8'), { sortKeys: true }) + '---' expect(reformat(input, {id: 'abcd1234'})).toBe(expected) }) + +test('should convert to legacy format', async () => { + const expectedContentFilename = path.join(__dirname, '__fixtures__', 'article-uploaded-to-the-cloud-v0.json') + const inputFilename = path.join(__dirname, '__fixtures__', 'article-uploaded-to-the-cloud-v1.json') + + const expectedContent = JSON.parse(await fs.readFile(expectedContentFilename, 'utf8')) + const input =JSON.parse( await fs.readFile(inputFilename, 'utf8')) + const actual = toLegacyFormat(input) + expect(actual).toMatchObject({ + ...expectedContent, + abstract: expect.arrayContaining(expectedContent.abstract) + }) +}) + +test('should convert from legacy format', async () => { + const expectedContentFilename = path.join(__dirname, '__fixtures__', 'article-uploaded-to-the-cloud-v1.json') + const inputFilename = path.join(__dirname, '__fixtures__', 'article-uploaded-to-the-cloud-v0.json') + + const expectedContent = JSON.parse(await fs.readFile(expectedContentFilename, 'utf8')) + const input =JSON.parse( await fs.readFile(inputFilename, 'utf8')) + const actual = fromLegacyFormat(input) + expect(actual).toMatchObject({ + ...expectedContent, + journal: { + ...expectedContent.journal, + url: undefined + } + }) +})