Skip to content

Commit

Permalink
#199 make science categories readable (#200)
Browse files Browse the repository at this point in the history
Refactor HAL adapter
Make call on API to resolve domain as software categories
closes #199
  • Loading branch information
guillermau authored Dec 4, 2024
1 parent c47e815 commit 2b1a533
Show file tree
Hide file tree
Showing 12 changed files with 315 additions and 235 deletions.
48 changes: 48 additions & 0 deletions api/src/core/adapters/hal/HalAPI/getDomains.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import fetch from "node-fetch";
import { HalAPIDomain, HalFetchError } from "./type";

export async function getAllDomains(): Promise<HalAPIDomain[]> {
// Get all domains
const url = "http://api.archives-ouvertes.fr/ref/domain/?fl=*";

const res = await fetch(url).catch(err => {
console.error(err);
throw new HalFetchError(undefined);
});

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return getAllDomains();
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

return json.response.docs;
}

export async function getDomainByCode(code: string): Promise<HalAPIDomain> {
// Get domain using code
const url = `http://api.archives-ouvertes.fr/ref/domain/?q=code_s:${code}&fl=*`;

const res = await fetch(url).catch(err => {
console.error(err);
throw new HalFetchError(undefined);
});

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return getDomainByCode(code);
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

return json.response.docs[0];
}
Original file line number Diff line number Diff line change
@@ -1,40 +1,9 @@
import memoize from "memoizee";
import fetch from "node-fetch";
import { SoftwareExternalData, GetSoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import {
HalRawSoftware,
halSoftwareFieldsToReturnAsString,
rawHalSoftwareToSoftwareExternalData
} from "./halRawSoftware";
import { halSoftwareFieldsToReturnAsString } from "../halRawSoftware";
import { HalFetchError, HalRawSoftware } from "./type";

// HAL documentation is here : https://api.archives-ouvertes.fr/docs/search

export const getHalSoftware: GetSoftwareExternalData = memoize(
async (halDocId): Promise<SoftwareExternalData | undefined> => {
const halRawSoftware = await fetchHalSoftwareById(halDocId).catch(error => {
if (!(error instanceof HalFetchError)) throw error;
if (error.status === 404 || error.status === undefined) return;
throw error;
});

if (halRawSoftware === undefined) return;
if (halRawSoftware.docType_s !== "SOFTWARE") return;

return rawHalSoftwareToSoftwareExternalData(halRawSoftware);
},
{
"promise": true,
"maxAge": 3 * 3600 * 1000
}
);

export class HalFetchError extends Error {
constructor(public readonly status: number | undefined) {
super(`Hal fetch error status: ${status}`);
Object.setPrototypeOf(this, new.target.prototype);
}
}

export async function fetchHalSoftwareById(halDocid: string): Promise<HalRawSoftware | undefined> {
const res = await fetch(
`https://api.archives-ouvertes.fr/search/?q=docid:${halDocid}&wt=json&fl=${halSoftwareFieldsToReturnAsString}&sort=docid%20asc`
Expand Down
13 changes: 13 additions & 0 deletions api/src/core/adapters/hal/HalAPI/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { getAllDomains, getDomainByCode } from "./getDomains";
import { fetchHalSoftwareById, fetchHalSoftwares } from "./getHalSoftware";

export const halAPIGateway = {
software: {
getById: fetchHalSoftwareById,
getAll: fetchHalSoftwares
},
domain: {
getByCode: getDomainByCode,
gelAll: getAllDomains
}
};
170 changes: 170 additions & 0 deletions api/src/core/adapters/hal/HalAPI/type.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
export class HalFetchError extends Error {
constructor(public readonly status: number | undefined) {
super(`Hal fetch error status: ${status}`);
Object.setPrototypeOf(this, new.target.prototype);
}
}

export type HalRawSoftware = {
// the following fields are the ones that we use
docid: string;
title_s: string[];
en_title_s?: string[];
fr_title_s?: string[];
abstract_s?: string[];
en_abstract_s?: string[];
fr_abstract_s?: string[];
uri_s: string;
openAccess_bool: boolean;
docType_s: string;
label_bibtex: string;

// The following is the complete list of fields that could be returned by the HAL API

// label_s: string;
// citationRef_s: string;
// citationFull_s: string;
// label_endnote: string;
// label_coins: string;
domainAllCode_s: string[];
// level0_domain_s: string[];
// domain_s: string[];
// level1_domain_s: string[];
// fr_domainAllCodeLabel_fs?: string[];
// en_domainAllCodeLabel_fs?: string[];
// es_domainAllCodeLabel_fs: string[];
// eu_domainAllCodeLabel_fs: string[];
// primaryDomain_s: string;
// en_keyword_s?: string[];
keyword_s: string[];
// fr_keyword_s?: string[];
// authIdFormPerson_s: string[];
authIdForm_i: number[];
// authLastName_s: string[];
// authFirstName_s: string[];
// authMiddleName_s: string[];
authFullName_s: string[];
// authLastNameFirstName_s: string[];
// authIdLastNameFirstName_fs: string[];
// authFullNameIdFormPerson_fs: string[];
// authAlphaLastNameFirstNameId_fs: string[];
// authIdFullName_fs: string[];
// authFullNameId_fs: string[];
// authQuality_s: string[];
// authFullNameFormIDPersonIDIDHal_fs: string[];
// authFullNamePersonIDIDHal_fs: string[];
// authIdHalFullName_fs: string[];
// authFullNameIdHal_fs: string[];
// authAlphaLastNameFirstNameIdHal_fs: string[];
// authLastNameFirstNameIdHalPersonid_fs: string[];
// authIdHasPrimaryStructure_fs: string[];
authIdHal_s: string[];
// structPrimaryHasAuthId_fs: string[];
// structPrimaryHasAuthIdHal_fs: string[];
// structPrimaryHasAlphaAuthId_fs: string[];
// structPrimaryHasAlphaAuthIdHal_fs: string[];
// structPrimaryHasAlphaAuthIdHalPersonid_fs: string[];
// authIdHasStructure_fs: string[];
// structHasAuthId_fs: string[];
// structHasAuthIdHal_fs: string[];
// structHasAuthIdHalPersonid_s: string[];
// structHasAlphaAuthId_fs: string[];
// structHasAlphaAuthIdHal_fs: string[];
// structHasAlphaAuthIdHalPersonid_fs: string[];
// instStructId_i: number[];
// instStructIdName_fs: string[];
// instStructNameId_fs: string[];
// instStructName_fs: string[];
// instStructName_s: string[];
// instStructAddress_s: string;
// instStructCountry_s: string;
// instStructType_s: string;
// instStructValid_s: string;
// structId_i: number[];
// structIdName_fs: string[];
// structNameId_fs: string[];
// structName_fs: string[];
// structName_s: string;
// structAddress_s: string;
// structCountry_s: string;
// structType_s: string;
// structValid_s: string;
// contributorId_i: number;
// contributorFullName_s: string;
// contributorIdFullName_fs: string;
// contributorFullNameId_fs: string;
// language_s: string[];
// halId_s: string;
// version_i: number;
// status_i: number;
// instance_s: string;
// sid_i: number;
// submitType_s: string;
// docSubType_s: string;
// oldDocType_s: string;
// thumbId_i: number;
// selfArchiving_bool: boolean;
// authorityInstitution_s: string[];
// reportType_s: string;
// inPress_bool: boolean;
modifiedDate_tdate: string;
// modifiedDate_s: string;
// modifiedDateY_i: number;
// modifiedDateM_i: number;
// modifiedDateD_i: number;
// submittedDate_tdate: string;
// submittedDate_s: string;
// submittedDateY_i: number;
// submittedDateM_i: number;
// submittedDateD_i: number;
// releasedDate_tdate: string;
// releasedDate_s: string;
// releasedDateY_i: number;
// releasedDateM_i: number;
// releasedDateD_i: number;
// producedDate_tdate: string;
// producedDate_s: string;
// producedDateY_i: number;
// producedDateM_i: number;
// producedDateD_i: number;
// publicationDate_tdate: string;
// publicationDate_s: string;
// publicationDateY_i: number;
// publicationDateM_i: number;
// publicationDateD_i: number;
// owners_i: number[];
// collId_i: number[];
// collName_s: string[];
// collCode_s: string[];
// collCategory_s: string[];
// collIdName_fs: string[];
// collNameId_fs: string[];
// collCodeName_fs: string[];
// collCategoryCodeName_fs: string[];
// collNameCode_fs: string[];
// fileMain_s: string;
// files_s: string[];
// fileType_s: string[];
// _version_: bigint;
// dateLastIndexed_tdate: string;
// label_xml: string;
softCodeRepository_s: string[];
// softDevelopmentStatus_s: string[];
softPlatform_s: string[];
softProgrammingLanguage_s: string[];
// softRuntimePlatform_s: string[];
softVersion_s: string[];
licence_s: string[];
};

export type HalAPIDomain = {
docid: number;
haveNext_bool: boolean;
code_s: string;
en_domain_s: string;
fr_domain_s: string;
label_s: string;
_version_: number;
dateLastIndexed_tdate: string; // ISO date
level_i: number;
};
4 changes: 2 additions & 2 deletions api/src/core/adapters/hal/getHalSoftware.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, it } from "vitest";
import { expectToEqual } from "../../../tools/test.helpers";
import { getHalSoftware } from "./getHalSoftware";
import { getHalSoftware } from "./getSoftwareFromHal";
import { getHalSoftwareOptions } from "./getHalSoftwareOptions";

describe("HAL", () => {
Expand Down Expand Up @@ -34,7 +34,7 @@ describe("HAL", () => {
"softwareVersion": undefined,
"keywords": undefined,
"programmingLanguages": undefined,
"applicationCategories": ["info"],
"applicationCategories": ["Computer Science [cs]"],
"publicationTime": new Date(1561566581000)
});
});
Expand Down
3 changes: 2 additions & 1 deletion api/src/core/adapters/hal/getHalSoftwareOptions.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import fetch from "node-fetch";
import type { GetSoftwareExternalDataOptions } from "../../ports/GetSoftwareExternalDataOptions";
import { HalRawSoftware, halSoftwareFieldsToReturnAsString, rawHalSoftwareToExternalOption } from "./halRawSoftware";
import { halSoftwareFieldsToReturnAsString, rawHalSoftwareToExternalOption } from "./halRawSoftware";
import { HalRawSoftware } from "./HalAPI/type";

// HAL documentation is here : https://api.archives-ouvertes.fr/docs/search

Expand Down
67 changes: 67 additions & 0 deletions api/src/core/adapters/hal/getSoftwareFromHal.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import memoize from "memoizee";
import { GetSoftwareExternalData, SoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import { fetchHalSoftwareById } from "./HalAPI/getHalSoftware";
import { parseBibliographicFields } from "./parseBibliographicFields";
import { halAPIGateway } from "./HalAPI";
import { HalFetchError } from "./HalAPI/type";

export const getHalSoftware: GetSoftwareExternalData = memoize(
async (halDocId): Promise<SoftwareExternalData | undefined> => {
const halRawSoftware = await fetchHalSoftwareById(halDocId).catch(error => {
if (!(error instanceof HalFetchError)) throw error;
if (error.status === 404 || error.status === undefined) return;
throw error;
});

if (halRawSoftware === undefined) return;
if (halRawSoftware.docType_s !== "SOFTWARE") return;

const bibliographicReferences = parseBibliographicFields(halRawSoftware.label_bibtex);
const license = bibliographicReferences?.license?.join(", ");

const sciencesCategories = await Promise.all(
halRawSoftware.domainAllCode_s.map(async (code: string): Promise<string> => {
const domain = await halAPIGateway.domain.getByCode(code);
return domain.en_domain_s;
})
);

return {
externalId: halRawSoftware.docid,
externalDataOrigin: "HAL",
developers: halRawSoftware.authFullName_s.map((fullname: string, index: number) => {
return {
"id": halRawSoftware?.authIdHal_s?.[index] ?? halRawSoftware.authIdForm_i[index].toString(),
"name": fullname
};
}),
label: {
"en": halRawSoftware?.en_title_s?.[0] ?? halRawSoftware?.title_s?.[0] ?? "-",
"fr": halRawSoftware?.fr_title_s?.[0] ?? halRawSoftware.en_title_s?.[0] // TODO pourquoi en anglais et pas défault ?
},
description: {
"en": halRawSoftware?.en_abstract_s?.[0] ?? halRawSoftware.abstract_s?.[0] ?? "-",
"fr": halRawSoftware?.fr_abstract_s?.[0] ?? halRawSoftware.en_abstract_s?.[0] // TODO pourquoi en anglais et pas défault ?
},
isLibreSoftware: halRawSoftware.openAccess_bool,
// Optionnal
logoUrl: undefined,
framaLibreId: undefined,
websiteUrl: halRawSoftware.uri_s,
sourceUrl: halRawSoftware?.softCodeRepository_s?.[0],
documentationUrl: undefined, // TODO no info about documentation in HAL check on SWH or Repo ?
license,
softwareVersion: halRawSoftware?.softVersion_s?.[0],
keywords: halRawSoftware?.keyword_s,
programmingLanguages: halRawSoftware?.softProgrammingLanguage_s,
applicationCategories: sciencesCategories,
publicationTime: halRawSoftware?.modifiedDate_tdate
? new Date(halRawSoftware?.modifiedDate_tdate)
: undefined
};
},
{
"promise": true,
"maxAge": 3 * 3600 * 1000
}
);
Loading

0 comments on commit 2b1a533

Please sign in to comment.