diff --git a/api/.env.sh b/api/.env.sh index c235e2eb..a48d7def 100755 --- a/api/.env.sh +++ b/api/.env.sh @@ -23,7 +23,9 @@ export CONFIGURATION=$(cat << EOF "githubWebhookSecret": "$SILL_WEBHOOK_SECRET", "port": $SILL_API_PORT, "isDevEnvironnement": $SILL_IS_DEV_ENVIRONNEMENT, - "externalSoftwareDataOrigin": $SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN + "externalSoftwareDataOrigin": $SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN, + "initializeSoftwareFromSource": $INIT_SOFT_FROM_SOURCE, + "botAgentEmail": $BOT_AGENT_EMAIL, } EOF ) diff --git a/api/scripts/compile-data.ts b/api/scripts/compile-data.ts index 5b6415ad..e0270b91 100644 --- a/api/scripts/compile-data.ts +++ b/api/scripts/compile-data.ts @@ -15,7 +15,9 @@ import { env } from "../src/env"; "githubPersonalAccessTokenForApiRateLimit": env.githubPersonalAccessTokenForApiRateLimit, "doPerPerformPeriodicalCompilation": false, "doPerformCacheInitialization": false, - "externalSoftwareDataOrigin": env.externalSoftwareDataOrigin + "externalSoftwareDataOrigin": env.externalSoftwareDataOrigin, + "initializeSoftwareFromSource": env.initializeSoftwareFromSource, + "botAgentEmail": env.botAgentEmail }); await useCases.fetchAndSaveExternalDataForAllSoftwares(); diff --git a/api/src/core/adapters/hal/getHalSoftware.ts b/api/src/core/adapters/hal/getHalSoftware.ts index cb4f6da4..56192e12 100644 --- a/api/src/core/adapters/hal/getHalSoftware.ts +++ b/api/src/core/adapters/hal/getHalSoftware.ts @@ -40,8 +40,6 @@ export async function fetchHalSoftwareById(halDocid: string): Promise undefined); - console.log("Hal response status : ", res?.status); - if (res === undefined) { throw new HalFetchError(undefined); } @@ -59,3 +57,26 @@ export async function fetchHalSoftwareById(halDocid: string): Promise> { + // Filter only software who have an swhidId to filter clean data on https://hal.science, TODO remove and set it as an option to be generic + const url = `https://api.archives-ouvertes.fr/search/?q=docType_s:SOFTWARE&rows=10000&fl=${halSoftwareFieldsToReturnAsString}&fq=swhidId_s:["" TO *]`; + + const res = await fetch(url).catch(err => { + console.error(err); + throw new HalFetchError(undefined); + }); + + if (res.status === 429) { + await new Promise(resolve => setTimeout(resolve, 100)); + return fetchHalSoftwares(); + } + + if (res.status === 404) { + throw new HalFetchError(res.status); + } + + const json = await res.json(); + + return json.response.docs; +} diff --git a/api/src/core/adapters/hal/halRawSoftware.ts b/api/src/core/adapters/hal/halRawSoftware.ts index fa233485..3190708b 100644 --- a/api/src/core/adapters/hal/halRawSoftware.ts +++ b/api/src/core/adapters/hal/halRawSoftware.ts @@ -1,5 +1,6 @@ import { Language, SoftwareExternalData } from "../../ports/GetSoftwareExternalData"; import { SoftwareExternalDataOption } from "../../ports/GetSoftwareExternalDataOptions"; +import { SoftwareFormData } from "../../usecases/readWriteSillData"; import { parseBibliographicFields } from "./parseBibliographicFields"; const halSoftwareFieldsToReturn: (keyof HalRawSoftware)[] = [ @@ -20,11 +21,15 @@ export const halSoftwareFieldsToReturnAsString = halSoftwareFieldsToReturn.join( export const rawHalSoftwareToSoftwareExternalData = (halSoftware: HalRawSoftware): SoftwareExternalData => { const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex); - const license = bibliographicReferences.license.join(", "); - const developers = bibliographicReferences.author.map(author => ({ - id: author.toLowerCase().split(" ").join("-"), - name: author - })); + const license = bibliographicReferences?.license?.join(", "); + + const developers = + bibliographicReferences && bibliographicReferences.author + ? bibliographicReferences.author.map(author => ({ + id: author.toLowerCase().split(" ").join("-"), + name: author + })) + : []; return { externalId: halSoftware.docid, @@ -214,4 +219,39 @@ export type HalRawSoftware = { // _version_: bigint; // dateLastIndexed_tdate: string; // label_xml: string; + // softCodeRepository_s: string[]; + // softDevelopmentStatus_s: string[]; + // softPlatform_s:string[]; + // softProgrammingLanguage_s: string[]; + // softRuntimePlatform_s: string[]; + // softVersion_s: string[]; + // licence_s: string[]; +}; + +export const halRawSoftwareToSoftwareForm = (halSoftware: HalRawSoftware): SoftwareFormData => { + const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex); + const license = bibliographicReferences?.license?.join(", "); + + // TODO Mapping + const formData: SoftwareFormData = { + softwareName: halSoftware.title_s[0], + softwareDescription: halSoftware.abstract_s ? halSoftware.abstract_s[0] : "", + softwareType: { + type: "desktop/mobile", + os: { "linux": true, "windows": false, "android": false, "ios": false, "mac": false } + }, // TODO + externalId: halSoftware.docid, + comptoirDuLibreId: undefined, + softwareLicense: license || "copyright", // TODO + softwareMinimalVersion: "1", // TODO + similarSoftwareExternalDataIds: [], + softwareLogoUrl: "https://www.gnu.org/graphics/gnu-head-30-years-anniversary.svg", + softwareKeywords: [], + + isPresentInSupportContract: false, + isFromFrenchPublicService: false, // TODO comment + doRespectRgaa: null + }; + + return formData; }; diff --git a/api/src/core/adapters/hal/index.ts b/api/src/core/adapters/hal/index.ts new file mode 100644 index 00000000..ac783cf0 --- /dev/null +++ b/api/src/core/adapters/hal/index.ts @@ -0,0 +1,7 @@ +import { fetchHalSoftwares } from "./getHalSoftware"; + +export const halAPIGateway = { + software: { + getAll: fetchHalSoftwares + } +}; diff --git a/api/src/core/bootstrap.ts b/api/src/core/bootstrap.ts index c4b0dcb8..aaf2803e 100644 --- a/api/src/core/bootstrap.ts +++ b/api/src/core/bootstrap.ts @@ -21,6 +21,7 @@ import type { UserApi } from "./ports/UserApi"; import { UseCases } from "./usecases"; import { makeGetAgent } from "./usecases/getAgent"; import { makeGetSoftwareFormAutoFillDataFromExternalAndOtherSources } from "./usecases/getSoftwareFormAutoFillDataFromExternalAndOtherSources"; +import { importFromHALSource } from "./usecases/importFromSource"; type PgDbConfig = { dbKind: "kysely"; kyselyDb: Kysely }; @@ -33,6 +34,8 @@ type ParamsOfBootstrapCore = { doPerPerformPeriodicalCompilation: boolean; doPerformCacheInitialization: boolean; externalSoftwareDataOrigin: ExternalDataOrigin; + initializeSoftwareFromSource: boolean; + botAgentEmail: string; }; export type Context = { @@ -64,7 +67,9 @@ export async function bootstrapCore( githubPersonalAccessTokenForApiRateLimit, doPerPerformPeriodicalCompilation, doPerformCacheInitialization, - externalSoftwareDataOrigin + externalSoftwareDataOrigin, + initializeSoftwareFromSource, + botAgentEmail } = params; const { getSoftwareLatestVersion } = createGetSoftwareLatestVersion({ @@ -113,6 +118,21 @@ export async function bootstrapCore( await initializeUserApiCache(); } + if (initializeSoftwareFromSource) { + if (externalSoftwareDataOrigin === "HAL") { + console.log(" ------ Feeding database with HAL software started ------"); + const importHAL = importFromHALSource(dbApi); + try { + await importHAL(botAgentEmail); + } catch (err) { + // catches errors both in fetch and response.json + console.error(err); + } + + console.log(" ------ Feeding database with HAL software finished ------"); + } + } + if (doPerPerformPeriodicalCompilation) { const frequencyOfUpdate = 1000 * 60 * 60 * 4; // 4 hours diff --git a/api/src/core/usecases/importFromSource.ts b/api/src/core/usecases/importFromSource.ts new file mode 100644 index 00000000..a5ab5f22 --- /dev/null +++ b/api/src/core/usecases/importFromSource.ts @@ -0,0 +1,37 @@ +import { DbApiV2 } from "../ports/DbApiV2"; +import { halAPIGateway } from "../adapters/hal"; +import { halRawSoftwareToSoftwareForm } from "../adapters/hal/halRawSoftware"; + +export const importFromHALSource: (dbApi: DbApiV2) => (agentEmail: string) => Promise[]> = ( + dbApi: DbApiV2 +) => { + return async (agentEmail: string) => { + const agent = await dbApi.agent.getByEmail(agentEmail); + const agentId = agent + ? agent.id + : await dbApi.agent.add({ + email: agentEmail, + "isPublic": false, + organization: "", + about: "This is an bot user created to import data." + }); + + const softwares = await halAPIGateway.software.getAll(); + const dbSoftwares = await dbApi.software.getAll(); + const dbSoftwaresNames = dbSoftwares.map(software => { + return software.softwareName; + }); + + return softwares.map(async software => { + const newSoft = halRawSoftwareToSoftwareForm(software); + const index = dbSoftwaresNames.indexOf(newSoft.softwareName); + + if (index != -1) { + return dbSoftwares[index].softwareId; + } else { + console.log("Importing HAL : ", software.docid); + return dbApi.software.create({ formData: newSoft, externalDataOrigin: "HAL", agentId: agentId }); + } + }); + }; +}; diff --git a/api/src/core/usecases/readWriteSillData/types.ts b/api/src/core/usecases/readWriteSillData/types.ts index 2cf6849a..3258915a 100644 --- a/api/src/core/usecases/readWriteSillData/types.ts +++ b/api/src/core/usecases/readWriteSillData/types.ts @@ -124,11 +124,11 @@ export type Prerogative = keyof Prerogatives; export type Os = "windows" | "linux" | "mac" | "android" | "ios"; export type SoftwareFormData = { + softwareName: string; + softwareDescription: string; softwareType: SoftwareType; externalId: string | undefined; comptoirDuLibreId: number | undefined; - softwareName: string; - softwareDescription: string; softwareLicense: string; softwareMinimalVersion: string; similarSoftwareExternalDataIds: string[]; diff --git a/api/src/env.ts b/api/src/env.ts index 206a4672..c9a1b87c 100644 --- a/api/src/env.ts +++ b/api/src/env.ts @@ -28,7 +28,9 @@ const zConfiguration = z.object({ // Completely disable this instance and redirect to another url "redirectUrl": z.string().optional(), "externalSoftwareDataOrigin": z.enum(["wikidata", "HAL"]).optional(), - "databaseUrl": z.string() + "databaseUrl": z.string(), + "initializeSoftwareFromSource": z.boolean(), + "botAgentEmail": z.string() }); const getJsonConfiguration = () => { @@ -65,7 +67,9 @@ const getJsonConfiguration = () => { "isDevEnvironnement": process.env.SILL_IS_DEV_ENVIRONNEMENT?.toLowerCase() === "true", "externalSoftwareDataOrigin": process.env.SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN, "redirectUrl": process.env.SILL_REDIRECT_URL, - "databaseUrl": process.env.DATABASE_URL + "databaseUrl": process.env.DATABASE_URL, + "initializeSoftwareFromSource": process.env.INIT_SOFT_FROM_SOURCE?.toLowerCase() === "true", + "botAgentEmail": process.env?.BOT_AGENT_EMAIL }; }; diff --git a/api/src/rpc/createTestCaller.ts b/api/src/rpc/createTestCaller.ts index 6fbfbd15..15804b3b 100644 --- a/api/src/rpc/createTestCaller.ts +++ b/api/src/rpc/createTestCaller.ts @@ -31,7 +31,9 @@ export const createTestCaller = async ({ user }: TestCallerConfig = { user: defa "githubPersonalAccessTokenForApiRateLimit": "fake-token", "doPerPerformPeriodicalCompilation": false, "doPerformCacheInitialization": false, - "externalSoftwareDataOrigin": externalSoftwareDataOrigin + "externalSoftwareDataOrigin": externalSoftwareDataOrigin, + "botAgentEmail": "bot@mydomain.fr", + "initializeSoftwareFromSource": false }); const jwtClaimByUserKey = { diff --git a/api/src/rpc/start.ts b/api/src/rpc/start.ts index 8e7b8c91..36ec0bed 100644 --- a/api/src/rpc/start.ts +++ b/api/src/rpc/start.ts @@ -41,6 +41,8 @@ export async function startRpcService(params: { externalSoftwareDataOrigin: ExternalDataOrigin; redirectUrl?: string; databaseUrl: string; + initializeSoftwareFromSource: boolean; + botAgentEmail: string; }) { const { redirectUrl, @@ -53,6 +55,8 @@ export async function startRpcService(params: { isDevEnvironnement, externalSoftwareDataOrigin, databaseUrl, + botAgentEmail, + initializeSoftwareFromSource, ...rest } = params; @@ -84,7 +88,9 @@ export async function startRpcService(params: { "doPerPerformPeriodicalCompilation": true, // "doPerPerformPeriodicalCompilation": !isDevEnvironnement && redirectUrl === undefined, "doPerformCacheInitialization": redirectUrl === undefined, - "externalSoftwareDataOrigin": externalSoftwareDataOrigin + "externalSoftwareDataOrigin": externalSoftwareDataOrigin, + "botAgentEmail": botAgentEmail, + "initializeSoftwareFromSource": initializeSoftwareFromSource }); console.log("Core API initialized"); diff --git a/deployments/docker-compose-example/.env.sample b/deployments/docker-compose-example/.env.sample index 1b928705..f6cc9171 100644 --- a/deployments/docker-compose-example/.env.sample +++ b/deployments/docker-compose-example/.env.sample @@ -12,6 +12,8 @@ SILL_GITHUB_TOKEN=xxxxx SILL_API_PORT=3084 SILL_IS_DEV_ENVIRONNEMENT=true SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN=wikidata +INIT_SOFT_FROM_SOURCE=false +BOT_AGENT_EMAIL=contact@sill.codegouv.fr DATABASE_URL=postgresql://sill:pg_password@localhost:5432/sill POSTGRES_DB=sill