Skip to content

Commit

Permalink
#160 automated import (#176)
Browse files Browse the repository at this point in the history
automatically software data from HAL optionally, enabling by env config
resolve #160
  • Loading branch information
guillermau authored Nov 22, 2024
1 parent 3895422 commit e89dd00
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 16 deletions.
4 changes: 3 additions & 1 deletion api/.env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ export CONFIGURATION=$(cat << EOF
"githubWebhookSecret": "$SILL_WEBHOOK_SECRET",
"port": $SILL_API_PORT,
"isDevEnvironnement": $SILL_IS_DEV_ENVIRONNEMENT,
"externalSoftwareDataOrigin": $SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN
"externalSoftwareDataOrigin": $SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN,
"initializeSoftwareFromSource": $INIT_SOFT_FROM_SOURCE,
"botAgentEmail": $BOT_AGENT_EMAIL,
}
EOF
)
Expand Down
4 changes: 3 additions & 1 deletion api/scripts/compile-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ import { env } from "../src/env";
"githubPersonalAccessTokenForApiRateLimit": env.githubPersonalAccessTokenForApiRateLimit,
"doPerPerformPeriodicalCompilation": false,
"doPerformCacheInitialization": false,
"externalSoftwareDataOrigin": env.externalSoftwareDataOrigin
"externalSoftwareDataOrigin": env.externalSoftwareDataOrigin,
"initializeSoftwareFromSource": env.initializeSoftwareFromSource,
"botAgentEmail": env.botAgentEmail
});

await useCases.fetchAndSaveExternalDataForAllSoftwares();
Expand Down
25 changes: 23 additions & 2 deletions api/src/core/adapters/hal/getHalSoftware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ export async function fetchHalSoftwareById(halDocid: string): Promise<HalRawSoft
`https://api.archives-ouvertes.fr/search/?q=docid:${halDocid}&wt=json&fl=${halSoftwareFieldsToReturnAsString}&sort=docid%20asc`
).catch(() => undefined);

console.log("Hal response status : ", res?.status);

if (res === undefined) {
throw new HalFetchError(undefined);
}
Expand All @@ -59,3 +57,26 @@ export async function fetchHalSoftwareById(halDocid: string): Promise<HalRawSoft

return json.response.docs[0];
}

export async function fetchHalSoftwares(): Promise<Array<HalRawSoftware>> {
// Filter only software who have an swhidId to filter clean data on https://hal.science, TODO remove and set it as an option to be generic
const url = `https://api.archives-ouvertes.fr/search/?q=docType_s:SOFTWARE&rows=10000&fl=${halSoftwareFieldsToReturnAsString}&fq=swhidId_s:["" TO *]`;

const res = await fetch(url).catch(err => {
console.error(err);
throw new HalFetchError(undefined);
});

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return fetchHalSoftwares();
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

return json.response.docs;
}
50 changes: 45 additions & 5 deletions api/src/core/adapters/hal/halRawSoftware.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Language, SoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import { SoftwareExternalDataOption } from "../../ports/GetSoftwareExternalDataOptions";
import { SoftwareFormData } from "../../usecases/readWriteSillData";
import { parseBibliographicFields } from "./parseBibliographicFields";

const halSoftwareFieldsToReturn: (keyof HalRawSoftware)[] = [
Expand All @@ -20,11 +21,15 @@ export const halSoftwareFieldsToReturnAsString = halSoftwareFieldsToReturn.join(

export const rawHalSoftwareToSoftwareExternalData = (halSoftware: HalRawSoftware): SoftwareExternalData => {
const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex);
const license = bibliographicReferences.license.join(", ");
const developers = bibliographicReferences.author.map(author => ({
id: author.toLowerCase().split(" ").join("-"),
name: author
}));
const license = bibliographicReferences?.license?.join(", ");

const developers =
bibliographicReferences && bibliographicReferences.author
? bibliographicReferences.author.map(author => ({
id: author.toLowerCase().split(" ").join("-"),
name: author
}))
: [];

return {
externalId: halSoftware.docid,
Expand Down Expand Up @@ -214,4 +219,39 @@ export type HalRawSoftware = {
// _version_: bigint;
// dateLastIndexed_tdate: string;
// label_xml: string;
// softCodeRepository_s: string[];
// softDevelopmentStatus_s: string[];
// softPlatform_s:string[];
// softProgrammingLanguage_s: string[];
// softRuntimePlatform_s: string[];
// softVersion_s: string[];
// licence_s: string[];
};

export const halRawSoftwareToSoftwareForm = (halSoftware: HalRawSoftware): SoftwareFormData => {
const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex);
const license = bibliographicReferences?.license?.join(", ");

// TODO Mapping
const formData: SoftwareFormData = {
softwareName: halSoftware.title_s[0],
softwareDescription: halSoftware.abstract_s ? halSoftware.abstract_s[0] : "",
softwareType: {
type: "desktop/mobile",
os: { "linux": true, "windows": false, "android": false, "ios": false, "mac": false }
}, // TODO
externalId: halSoftware.docid,
comptoirDuLibreId: undefined,
softwareLicense: license || "copyright", // TODO
softwareMinimalVersion: "1", // TODO
similarSoftwareExternalDataIds: [],
softwareLogoUrl: "https://www.gnu.org/graphics/gnu-head-30-years-anniversary.svg",
softwareKeywords: [],

isPresentInSupportContract: false,
isFromFrenchPublicService: false, // TODO comment
doRespectRgaa: null
};

return formData;
};
7 changes: 7 additions & 0 deletions api/src/core/adapters/hal/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { fetchHalSoftwares } from "./getHalSoftware";

export const halAPIGateway = {
software: {
getAll: fetchHalSoftwares
}
};
22 changes: 21 additions & 1 deletion api/src/core/bootstrap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import type { UserApi } from "./ports/UserApi";
import { UseCases } from "./usecases";
import { makeGetAgent } from "./usecases/getAgent";
import { makeGetSoftwareFormAutoFillDataFromExternalAndOtherSources } from "./usecases/getSoftwareFormAutoFillDataFromExternalAndOtherSources";
import { importFromHALSource } from "./usecases/importFromSource";

type PgDbConfig = { dbKind: "kysely"; kyselyDb: Kysely<Database> };

Expand All @@ -33,6 +34,8 @@ type ParamsOfBootstrapCore = {
doPerPerformPeriodicalCompilation: boolean;
doPerformCacheInitialization: boolean;
externalSoftwareDataOrigin: ExternalDataOrigin;
initializeSoftwareFromSource: boolean;
botAgentEmail: string;
};

export type Context = {
Expand Down Expand Up @@ -64,7 +67,9 @@ export async function bootstrapCore(
githubPersonalAccessTokenForApiRateLimit,
doPerPerformPeriodicalCompilation,
doPerformCacheInitialization,
externalSoftwareDataOrigin
externalSoftwareDataOrigin,
initializeSoftwareFromSource,
botAgentEmail
} = params;

const { getSoftwareLatestVersion } = createGetSoftwareLatestVersion({
Expand Down Expand Up @@ -113,6 +118,21 @@ export async function bootstrapCore(
await initializeUserApiCache();
}

if (initializeSoftwareFromSource) {
if (externalSoftwareDataOrigin === "HAL") {
console.log(" ------ Feeding database with HAL software started ------");
const importHAL = importFromHALSource(dbApi);
try {
await importHAL(botAgentEmail);
} catch (err) {
// catches errors both in fetch and response.json
console.error(err);
}

console.log(" ------ Feeding database with HAL software finished ------");
}
}

if (doPerPerformPeriodicalCompilation) {
const frequencyOfUpdate = 1000 * 60 * 60 * 4; // 4 hours

Expand Down
37 changes: 37 additions & 0 deletions api/src/core/usecases/importFromSource.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { DbApiV2 } from "../ports/DbApiV2";
import { halAPIGateway } from "../adapters/hal";
import { halRawSoftwareToSoftwareForm } from "../adapters/hal/halRawSoftware";

export const importFromHALSource: (dbApi: DbApiV2) => (agentEmail: string) => Promise<Promise<number | undefined>[]> = (
dbApi: DbApiV2
) => {
return async (agentEmail: string) => {
const agent = await dbApi.agent.getByEmail(agentEmail);
const agentId = agent
? agent.id
: await dbApi.agent.add({
email: agentEmail,
"isPublic": false,
organization: "",
about: "This is an bot user created to import data."
});

const softwares = await halAPIGateway.software.getAll();
const dbSoftwares = await dbApi.software.getAll();
const dbSoftwaresNames = dbSoftwares.map(software => {
return software.softwareName;
});

return softwares.map(async software => {
const newSoft = halRawSoftwareToSoftwareForm(software);
const index = dbSoftwaresNames.indexOf(newSoft.softwareName);

if (index != -1) {
return dbSoftwares[index].softwareId;
} else {
console.log("Importing HAL : ", software.docid);
return dbApi.software.create({ formData: newSoft, externalDataOrigin: "HAL", agentId: agentId });
}
});
};
};
4 changes: 2 additions & 2 deletions api/src/core/usecases/readWriteSillData/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,11 @@ export type Prerogative = keyof Prerogatives;
export type Os = "windows" | "linux" | "mac" | "android" | "ios";

export type SoftwareFormData = {
softwareName: string;
softwareDescription: string;
softwareType: SoftwareType;
externalId: string | undefined;
comptoirDuLibreId: number | undefined;
softwareName: string;
softwareDescription: string;
softwareLicense: string;
softwareMinimalVersion: string;
similarSoftwareExternalDataIds: string[];
Expand Down
8 changes: 6 additions & 2 deletions api/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ const zConfiguration = z.object({
// Completely disable this instance and redirect to another url
"redirectUrl": z.string().optional(),
"externalSoftwareDataOrigin": z.enum(["wikidata", "HAL"]).optional(),
"databaseUrl": z.string()
"databaseUrl": z.string(),
"initializeSoftwareFromSource": z.boolean(),
"botAgentEmail": z.string()
});

const getJsonConfiguration = () => {
Expand Down Expand Up @@ -65,7 +67,9 @@ const getJsonConfiguration = () => {
"isDevEnvironnement": process.env.SILL_IS_DEV_ENVIRONNEMENT?.toLowerCase() === "true",
"externalSoftwareDataOrigin": process.env.SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN,
"redirectUrl": process.env.SILL_REDIRECT_URL,
"databaseUrl": process.env.DATABASE_URL
"databaseUrl": process.env.DATABASE_URL,
"initializeSoftwareFromSource": process.env.INIT_SOFT_FROM_SOURCE?.toLowerCase() === "true",
"botAgentEmail": process.env?.BOT_AGENT_EMAIL
};
};

Expand Down
4 changes: 3 additions & 1 deletion api/src/rpc/createTestCaller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ export const createTestCaller = async ({ user }: TestCallerConfig = { user: defa
"githubPersonalAccessTokenForApiRateLimit": "fake-token",
"doPerPerformPeriodicalCompilation": false,
"doPerformCacheInitialization": false,
"externalSoftwareDataOrigin": externalSoftwareDataOrigin
"externalSoftwareDataOrigin": externalSoftwareDataOrigin,
"botAgentEmail": "[email protected]",
"initializeSoftwareFromSource": false
});

const jwtClaimByUserKey = {
Expand Down
8 changes: 7 additions & 1 deletion api/src/rpc/start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ export async function startRpcService(params: {
externalSoftwareDataOrigin: ExternalDataOrigin;
redirectUrl?: string;
databaseUrl: string;
initializeSoftwareFromSource: boolean;
botAgentEmail: string;
}) {
const {
redirectUrl,
Expand All @@ -53,6 +55,8 @@ export async function startRpcService(params: {
isDevEnvironnement,
externalSoftwareDataOrigin,
databaseUrl,
botAgentEmail,
initializeSoftwareFromSource,
...rest
} = params;

Expand Down Expand Up @@ -84,7 +88,9 @@ export async function startRpcService(params: {
"doPerPerformPeriodicalCompilation": true,
// "doPerPerformPeriodicalCompilation": !isDevEnvironnement && redirectUrl === undefined,
"doPerformCacheInitialization": redirectUrl === undefined,
"externalSoftwareDataOrigin": externalSoftwareDataOrigin
"externalSoftwareDataOrigin": externalSoftwareDataOrigin,
"botAgentEmail": botAgentEmail,
"initializeSoftwareFromSource": initializeSoftwareFromSource
});

console.log("Core API initialized");
Expand Down
2 changes: 2 additions & 0 deletions deployments/docker-compose-example/.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ SILL_GITHUB_TOKEN=xxxxx
SILL_API_PORT=3084
SILL_IS_DEV_ENVIRONNEMENT=true
SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN=wikidata
INIT_SOFT_FROM_SOURCE=false
BOT_AGENT_EMAIL=[email protected]

DATABASE_URL=postgresql://sill:pg_password@localhost:5432/sill
POSTGRES_DB=sill
Expand Down

0 comments on commit e89dd00

Please sign in to comment.