Skip to content

Commit

Permalink
Merge branch 'main' into #215-import-from-wikidata
Browse files Browse the repository at this point in the history
  • Loading branch information
guillermau authored Dec 11, 2024
2 parents db3c7d0 + 1132753 commit 1a959f6
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 20 deletions.
60 changes: 60 additions & 0 deletions api/src/core/adapters/hal/HalAPI/getStructure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { HalFetchError, HalStructure } from "./type";

export const getHalStructureByAcronym = async (structureAcronym: string): Promise<HalStructure | undefined> => {
const url = `http://api.archives-ouvertes.fr/ref/structure/?fl=*&q=acronym_s:"${encodeURIComponent(
structureAcronym
)}"`;

const res = await fetch(url, {
signal: AbortSignal.timeout(10000)
}).catch(err => {
console.error(err);
throw new HalFetchError(undefined);
});

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return getHalStructureByAcronym(structureAcronym);
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

if (json.error) {
throw new HalFetchError(json.error);
}

// What do to when multiple for one acronym while in code meta only reference to acronym => LIDILEM, EPFL
return json.response.docs?.[0]; // json.response.numFound === 1 ? : undefined;
};

export const getHalStructureById = async (docid: number): Promise<HalStructure | undefined> => {
const url = `http://api.archives-ouvertes.fr/ref/structure/?fl=*&q=docid:${docid}`;

const res = await fetch(url, {
signal: AbortSignal.timeout(10000)
}).catch(err => {
console.error(err);
throw new HalFetchError(undefined);
});

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return getHalStructureById(docid);
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

if (json.error) {
throw new HalFetchError(json.error);
}

return json.response.numFound === 1 ? json.response.docs?.[0] : undefined;
};
5 changes: 5 additions & 0 deletions api/src/core/adapters/hal/HalAPI/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { fetchCodeMetaSoftwareByURL } from "./getCodemetaSoftware";
import { getAllDomains, getDomainByCode } from "./getDomains";
import { fetchHalSoftwareById, fetchHalSoftwares } from "./getHalSoftware";
import { getHalStructureByAcronym, getHalStructureById } from "./getStructure";

export const halAPIGateway = {
software: {
Expand All @@ -11,5 +12,9 @@ export const halAPIGateway = {
domain: {
getByCode: getDomainByCode,
gelAll: getAllDomains
},
structure: {
getById: getHalStructureById,
getByAcronym: getHalStructureByAcronym
}
};
70 changes: 70 additions & 0 deletions api/src/core/adapters/hal/HalAPI/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,73 @@ export type HalAPIDomain = {
dateLastIndexed_tdate: string; // ISO date
level_i: number;
};

export type HalStructure = {
acronym_s: string[];
acronym_sci: string[];
acronym_t: string[];
address_s: string[];
address_t: string[];
aliasDocid_i: number;
code_s: string;
code_sci: string;
code_t: string;
country_s: string;
dateLastIndexed_tdate: Date;
docid: string;
label_html: string;
label_s: string;
label_sci: string;
label_xml: string;
locked_bool: boolean;
name_s: string;
name_sci: string;
name_t: string;
parentAcronym_s: string;
parentAcronym_t: string;
parentCountry_s: string;
parentDocid_i: string[];
parentName_s: string[];
parentType_s: string[];
parentUpdateDate_s: string[];
parentUpdateDate_tdate: string[];
parentUrl_s: string;
parentValid_s: string;
ror_s: string;
rorUrl_s: string;
text: string;
exte_autocomplete: string;
type_s: string;
updateDate_tdate: string;
url_s: string;
valid_s: string;
};

export type HalAuthor = {
accountAssociated_bool: boolean;
affPref_i: number;
dateLastIndexed_tdate: string; // ISO Date String
docid: string;
emailDomain_s: string;
emailId_t: string;
firstName_s: string;
firstName_t: string;
form_i: number;
fullNameDocid_fs: string;
fullName_autocomplete: string;
fullName_s: string;
fullName_sci: string;
fullName_t: string;
hasCV_bool: boolean;
idHal_i: number;
idHal_s: string;
label_html: string;
label_s: string;
lastName_s: string;
lastName_t: string;
middleName_s: string;
middleName_t: string;
person_i: number;
text_autocomplete: string;
valid_s: string;
};
1 change: 1 addition & 0 deletions api/src/core/adapters/hal/getHalSoftware.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ describe("HAL", () => {
"description": { "en": "-", "fr": undefined },
"developers": [
{
"affiliatedStructure": [],
"id": "0000-0002-9777-5560",
"name": "Morane Gruenpeter",
"url": "https://orcid.org/0000-0002-9777-5560"
Expand Down
87 changes: 68 additions & 19 deletions api/src/core/adapters/hal/getHalSoftwareExternalData.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
import memoize from "memoizee";
import { GetSoftwareExternalData, SoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import { AuthStructure, GetSoftwareExternalData, SoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import { fetchHalSoftwareById } from "./HalAPI/getHalSoftware";
import { halAPIGateway } from "./HalAPI";
import { HalFetchError } from "./HalAPI/type";

const buildParentStructureTree = async (
structureIdArray: number[] | string[] | undefined
): Promise<AuthStructure[]> => {
if (!structureIdArray) return [];

const IdsArray = structureIdArray.map(id => Number(id));

return await Promise.all(
IdsArray.map(async (structureId: number) => {
const structure = await halAPIGateway.structure.getById(structureId);

if (!structure) throw new Error(`Couldn't get data for structure docid : ${structureId}`);

return {
"name": structure.name_s,
"url": structure?.ror_s ?? structure?.url_s,
"parentStructure": await buildParentStructureTree(structure?.parentDocid_i)
};
})
);
};

export const getHalSoftwareExternalData: GetSoftwareExternalData = memoize(
async (halDocId): Promise<SoftwareExternalData | undefined> => {
const halRawSoftware = await fetchHalSoftwareById(halDocId).catch(error => {
Expand All @@ -23,28 +45,55 @@ export const getHalSoftwareExternalData: GetSoftwareExternalData = memoize(
);

const codemetaSoftware = await halAPIGateway.software.getCodemetaByUrl(halRawSoftware.uri_s);
const authors = codemetaSoftware?.author.map(auth => {
const author = auth.author;
const id = author?.["@id"]?.[0];
if (!codemetaSoftware) {
throw Error(`No codemeta found for doc : ${halDocId}`);
}

let base = {
"name": `${author.givenName} ${author.familyName}`,
"id": id
};
const authors = await Promise.all(
codemetaSoftware.author.map(async auth => {
const author = auth.author;
const id = author?.["@id"]?.[0];
const affiliation = author.affiliation;

if (id?.split("-")?.length === 4 && id?.length === 19) {
return { ...base, "url": `https://orcid.org/${id}` };
}
const base = {
"name": `${author.givenName} ${author.familyName}`,
"id": id,
"affiliatedStructure": [] as AuthStructure[]
};

if (id) {
return { ...base, "url": `https://hal.science/search/index/q/*/authIdHal_s/${id}` };
}
if (affiliation?.length > 0) {
const structures = await Promise.all(
affiliation
.filter(affilatiedStructure => affilatiedStructure.name)
.map(async affilatiedStructure => {
const structure = await halAPIGateway.structure.getByAcronym(affilatiedStructure?.name);
if (!structure) {
throw new Error(`Structure not found : name = ${affilatiedStructure?.name}`);
}
return {
"name": structure.name_s,
"url": structure.ror_s ?? structure?.url_s,
"parentStructure": await buildParentStructureTree(structure.parentDocid_i)
};
})
);
base.affiliatedStructure = structures;
}

return {
...base,
"url": `https://hal.science/search/index/q/*/authFullName_s/${author.givenName}+${author.familyName}`
};
});
if (id?.split("-")?.length === 4 && id?.length === 19) {
return { ...base, "url": `https://orcid.org/${id}` };
}

if (id) {
return { ...base, "url": `https://hal.science/search/index/q/*/authIdHal_s/${id}` };
}

return {
...base,
"url": `https://hal.science/search/index/q/*/authFullName_s/${author.givenName}+${author.familyName}`
};
})
);

return {
externalId: halRawSoftware.docid,
Expand Down
2 changes: 1 addition & 1 deletion api/src/core/adapters/hal/getSoftwareForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export const halRawSoftwareToSoftwareForm = async (halSoftware: HalRawSoftware):
externalId: halSoftware.docid,
comptoirDuLibreId: undefined,
softwareLicense: codemetaSoftware?.license?.[0] ?? "undefined", // TODO 1 case to copyright
softwareMinimalVersion: undefined, // Merge from #198
softwareMinimalVersion: undefined,
similarSoftwareExternalDataIds: [],
softwareLogoUrl: undefined,
softwareKeywords: halSoftware.keyword_s || [],
Expand Down
7 changes: 7 additions & 0 deletions api/src/core/ports/GetSoftwareExternalData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,20 @@ export type GetSoftwareExternalData = {
clear: (externalId: ExternalId) => void;
};

export type AuthStructure = {
name: string;
url: string | undefined;
parentStructure: AuthStructure[] | null;
};

export type SoftwareExternalData = {
externalId: ExternalId;
externalDataOrigin: ExternalDataOrigin;
developers: {
name: string;
id: string | undefined;
url: string;
affiliatedStructure?: AuthStructure[] | null;
}[];
label: LocalizedString;
description: LocalizedString;
Expand Down
1 change: 1 addition & 0 deletions api/src/types/codemeta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export interface Person extends Auth {

export interface Organization extends Auth {
"@type": "Organization";
name: string;
}

export type CodeMetaIdentifier = {
Expand Down

0 comments on commit 1a959f6

Please sign in to comment.