From 11327531aef8310421b3e040795be420eb6f028c Mon Sep 17 00:00:00 2001 From: Guillaume Bourdat Date: Wed, 11 Dec 2024 16:13:02 +0100 Subject: [PATCH] import affiliated structures of developers (#209) add affiliated structures on developers resolve #125 --- .../core/adapters/hal/HalAPI/getStructure.ts | 60 +++++++++++++ api/src/core/adapters/hal/HalAPI/index.ts | 5 ++ api/src/core/adapters/hal/HalAPI/type.ts | 70 +++++++++++++++ .../core/adapters/hal/getHalSoftware.test.ts | 1 + .../hal/getHalSoftwareExternalData.ts | 87 +++++++++++++++---- api/src/core/adapters/hal/getSoftwareForm.ts | 2 +- api/src/core/ports/GetSoftwareExternalData.ts | 7 ++ api/src/types/codemeta.ts | 1 + 8 files changed, 213 insertions(+), 20 deletions(-) create mode 100644 api/src/core/adapters/hal/HalAPI/getStructure.ts diff --git a/api/src/core/adapters/hal/HalAPI/getStructure.ts b/api/src/core/adapters/hal/HalAPI/getStructure.ts new file mode 100644 index 00000000..d1acc5b7 --- /dev/null +++ b/api/src/core/adapters/hal/HalAPI/getStructure.ts @@ -0,0 +1,60 @@ +import { HalFetchError, HalStructure } from "./type"; + +export const getHalStructureByAcronym = async (structureAcronym: string): Promise => { + const url = `http://api.archives-ouvertes.fr/ref/structure/?fl=*&q=acronym_s:"${encodeURIComponent( + structureAcronym + )}"`; + + const res = await fetch(url, { + signal: AbortSignal.timeout(10000) + }).catch(err => { + console.error(err); + throw new HalFetchError(undefined); + }); + + if (res.status === 429) { + await new Promise(resolve => setTimeout(resolve, 100)); + return getHalStructureByAcronym(structureAcronym); + } + + if (res.status === 404) { + throw new HalFetchError(res.status); + } + + const json = await res.json(); + + if (json.error) { + throw new HalFetchError(json.error); + } + + // What do to when multiple for one acronym while in code meta only reference to acronym => LIDILEM, EPFL + return json.response.docs?.[0]; // json.response.numFound === 1 ? : undefined; +}; + +export const getHalStructureById = async (docid: number): Promise => { + const url = `http://api.archives-ouvertes.fr/ref/structure/?fl=*&q=docid:${docid}`; + + const res = await fetch(url, { + signal: AbortSignal.timeout(10000) + }).catch(err => { + console.error(err); + throw new HalFetchError(undefined); + }); + + if (res.status === 429) { + await new Promise(resolve => setTimeout(resolve, 100)); + return getHalStructureById(docid); + } + + if (res.status === 404) { + throw new HalFetchError(res.status); + } + + const json = await res.json(); + + if (json.error) { + throw new HalFetchError(json.error); + } + + return json.response.numFound === 1 ? json.response.docs?.[0] : undefined; +}; diff --git a/api/src/core/adapters/hal/HalAPI/index.ts b/api/src/core/adapters/hal/HalAPI/index.ts index d381736a..dd5c054b 100644 --- a/api/src/core/adapters/hal/HalAPI/index.ts +++ b/api/src/core/adapters/hal/HalAPI/index.ts @@ -1,6 +1,7 @@ import { fetchCodeMetaSoftwareByURL } from "./getCodemetaSoftware"; import { getAllDomains, getDomainByCode } from "./getDomains"; import { fetchHalSoftwareById, fetchHalSoftwares } from "./getHalSoftware"; +import { getHalStructureByAcronym, getHalStructureById } from "./getStructure"; export const halAPIGateway = { software: { @@ -11,5 +12,9 @@ export const halAPIGateway = { domain: { getByCode: getDomainByCode, gelAll: getAllDomains + }, + structure: { + getById: getHalStructureById, + getByAcronym: getHalStructureByAcronym } }; diff --git a/api/src/core/adapters/hal/HalAPI/type.ts b/api/src/core/adapters/hal/HalAPI/type.ts index c38b29f9..24c59e61 100644 --- a/api/src/core/adapters/hal/HalAPI/type.ts +++ b/api/src/core/adapters/hal/HalAPI/type.ts @@ -168,3 +168,73 @@ export type HalAPIDomain = { dateLastIndexed_tdate: string; // ISO date level_i: number; }; + +export type HalStructure = { + acronym_s: string[]; + acronym_sci: string[]; + acronym_t: string[]; + address_s: string[]; + address_t: string[]; + aliasDocid_i: number; + code_s: string; + code_sci: string; + code_t: string; + country_s: string; + dateLastIndexed_tdate: Date; + docid: string; + label_html: string; + label_s: string; + label_sci: string; + label_xml: string; + locked_bool: boolean; + name_s: string; + name_sci: string; + name_t: string; + parentAcronym_s: string; + parentAcronym_t: string; + parentCountry_s: string; + parentDocid_i: string[]; + parentName_s: string[]; + parentType_s: string[]; + parentUpdateDate_s: string[]; + parentUpdateDate_tdate: string[]; + parentUrl_s: string; + parentValid_s: string; + ror_s: string; + rorUrl_s: string; + text: string; + exte_autocomplete: string; + type_s: string; + updateDate_tdate: string; + url_s: string; + valid_s: string; +}; + +export type HalAuthor = { + accountAssociated_bool: boolean; + affPref_i: number; + dateLastIndexed_tdate: string; // ISO Date String + docid: string; + emailDomain_s: string; + emailId_t: string; + firstName_s: string; + firstName_t: string; + form_i: number; + fullNameDocid_fs: string; + fullName_autocomplete: string; + fullName_s: string; + fullName_sci: string; + fullName_t: string; + hasCV_bool: boolean; + idHal_i: number; + idHal_s: string; + label_html: string; + label_s: string; + lastName_s: string; + lastName_t: string; + middleName_s: string; + middleName_t: string; + person_i: number; + text_autocomplete: string; + valid_s: string; +}; diff --git a/api/src/core/adapters/hal/getHalSoftware.test.ts b/api/src/core/adapters/hal/getHalSoftware.test.ts index 83502398..b8649d14 100644 --- a/api/src/core/adapters/hal/getHalSoftware.test.ts +++ b/api/src/core/adapters/hal/getHalSoftware.test.ts @@ -14,6 +14,7 @@ describe("HAL", () => { "description": { "en": "-", "fr": undefined }, "developers": [ { + "affiliatedStructure": [], "id": "0000-0002-9777-5560", "name": "Morane Gruenpeter", "url": "https://orcid.org/0000-0002-9777-5560" diff --git a/api/src/core/adapters/hal/getHalSoftwareExternalData.ts b/api/src/core/adapters/hal/getHalSoftwareExternalData.ts index 6244d3cd..87f46ef5 100644 --- a/api/src/core/adapters/hal/getHalSoftwareExternalData.ts +++ b/api/src/core/adapters/hal/getHalSoftwareExternalData.ts @@ -1,9 +1,31 @@ import memoize from "memoizee"; -import { GetSoftwareExternalData, SoftwareExternalData } from "../../ports/GetSoftwareExternalData"; +import { AuthStructure, GetSoftwareExternalData, SoftwareExternalData } from "../../ports/GetSoftwareExternalData"; import { fetchHalSoftwareById } from "./HalAPI/getHalSoftware"; import { halAPIGateway } from "./HalAPI"; import { HalFetchError } from "./HalAPI/type"; +const buildParentStructureTree = async ( + structureIdArray: number[] | string[] | undefined +): Promise => { + if (!structureIdArray) return []; + + const IdsArray = structureIdArray.map(id => Number(id)); + + return await Promise.all( + IdsArray.map(async (structureId: number) => { + const structure = await halAPIGateway.structure.getById(structureId); + + if (!structure) throw new Error(`Couldn't get data for structure docid : ${structureId}`); + + return { + "name": structure.name_s, + "url": structure?.ror_s ?? structure?.url_s, + "parentStructure": await buildParentStructureTree(structure?.parentDocid_i) + }; + }) + ); +}; + export const getHalSoftwareExternalData: GetSoftwareExternalData = memoize( async (halDocId): Promise => { const halRawSoftware = await fetchHalSoftwareById(halDocId).catch(error => { @@ -23,28 +45,55 @@ export const getHalSoftwareExternalData: GetSoftwareExternalData = memoize( ); const codemetaSoftware = await halAPIGateway.software.getCodemetaByUrl(halRawSoftware.uri_s); - const authors = codemetaSoftware?.author.map(auth => { - const author = auth.author; - const id = author?.["@id"]?.[0]; + if (!codemetaSoftware) { + throw Error(`No codemeta found for doc : ${halDocId}`); + } - let base = { - "name": `${author.givenName} ${author.familyName}`, - "id": id - }; + const authors = await Promise.all( + codemetaSoftware.author.map(async auth => { + const author = auth.author; + const id = author?.["@id"]?.[0]; + const affiliation = author.affiliation; - if (id?.split("-")?.length === 4 && id?.length === 19) { - return { ...base, "url": `https://orcid.org/${id}` }; - } + const base = { + "name": `${author.givenName} ${author.familyName}`, + "id": id, + "affiliatedStructure": [] as AuthStructure[] + }; - if (id) { - return { ...base, "url": `https://hal.science/search/index/q/*/authIdHal_s/${id}` }; - } + if (affiliation?.length > 0) { + const structures = await Promise.all( + affiliation + .filter(affilatiedStructure => affilatiedStructure.name) + .map(async affilatiedStructure => { + const structure = await halAPIGateway.structure.getByAcronym(affilatiedStructure?.name); + if (!structure) { + throw new Error(`Structure not found : name = ${affilatiedStructure?.name}`); + } + return { + "name": structure.name_s, + "url": structure.ror_s ?? structure?.url_s, + "parentStructure": await buildParentStructureTree(structure.parentDocid_i) + }; + }) + ); + base.affiliatedStructure = structures; + } - return { - ...base, - "url": `https://hal.science/search/index/q/*/authFullName_s/${author.givenName}+${author.familyName}` - }; - }); + if (id?.split("-")?.length === 4 && id?.length === 19) { + return { ...base, "url": `https://orcid.org/${id}` }; + } + + if (id) { + return { ...base, "url": `https://hal.science/search/index/q/*/authIdHal_s/${id}` }; + } + + return { + ...base, + "url": `https://hal.science/search/index/q/*/authFullName_s/${author.givenName}+${author.familyName}` + }; + }) + ); return { externalId: halRawSoftware.docid, diff --git a/api/src/core/adapters/hal/getSoftwareForm.ts b/api/src/core/adapters/hal/getSoftwareForm.ts index b483490b..adeeaaf0 100644 --- a/api/src/core/adapters/hal/getSoftwareForm.ts +++ b/api/src/core/adapters/hal/getSoftwareForm.ts @@ -40,7 +40,7 @@ export const halRawSoftwareToSoftwareForm = async (halSoftware: HalRawSoftware): externalId: halSoftware.docid, comptoirDuLibreId: undefined, softwareLicense: codemetaSoftware?.license?.[0] ?? "undefined", // TODO 1 case to copyright - softwareMinimalVersion: undefined, // Merge from #198 + softwareMinimalVersion: undefined, similarSoftwareExternalDataIds: [], softwareLogoUrl: undefined, softwareKeywords: halSoftware.keyword_s || [], diff --git a/api/src/core/ports/GetSoftwareExternalData.ts b/api/src/core/ports/GetSoftwareExternalData.ts index dedb837d..efef2f9e 100644 --- a/api/src/core/ports/GetSoftwareExternalData.ts +++ b/api/src/core/ports/GetSoftwareExternalData.ts @@ -12,6 +12,12 @@ export type GetSoftwareExternalData = { clear: (externalId: ExternalId) => void; }; +export type AuthStructure = { + name: string; + url: string | undefined; + parentStructure: AuthStructure[] | null; +}; + export type SoftwareExternalData = { externalId: ExternalId; externalDataOrigin: ExternalDataOrigin; @@ -19,6 +25,7 @@ export type SoftwareExternalData = { name: string; id: string | undefined; url: string; + affiliatedStructure?: AuthStructure[] | null; }[]; label: LocalizedString; description: LocalizedString; diff --git a/api/src/types/codemeta.ts b/api/src/types/codemeta.ts index 5eaa04ed..944fc4d2 100644 --- a/api/src/types/codemeta.ts +++ b/api/src/types/codemeta.ts @@ -32,6 +32,7 @@ export interface Person extends Auth { export interface Organization extends Auth { "@type": "Organization"; + name: string; } export type CodeMetaIdentifier = {