Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#215 : functions to import software from wikidata #234

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions api/src/core/adapters/wikidata/getSoftwareForm.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { SoftwareFormData } from "../../usecases/readWriteSillData";
import { createGetClaimDataValue, fetchEntity, WikidataFetchError } from "./getWikidataSoftware";

export const getWikidataForm = async (wikidataId: string): Promise<SoftwareFormData | undefined> => {
guillermau marked this conversation as resolved.
Show resolved Hide resolved
try {
console.info(` -> fetching wiki soft : ${wikidataId}`);
const { entity } =
(await fetchEntity(wikidataId).catch(error => {
if (error instanceof WikidataFetchError) {
if (error.status === 404 || error.status === undefined) {
return undefined;
}
throw error;
}
})) ?? {};

if (entity === undefined) {
return undefined;
}

const { getClaimDataValue } = createGetClaimDataValue({ entity });

const logoName = getClaimDataValue<"string">("P154")[0];

const license = await (async () => {
const licenseId = getClaimDataValue<"wikibase-entityid">("P275")[0]?.id;

if (licenseId === undefined) {
return undefined;
}

console.info(`I -> fetching wiki license : ${licenseId}`);
const { entity } = await fetchEntity(licenseId).catch(() => ({ "entity": undefined }));

if (entity === undefined) {
return undefined;
}

return { "label": entity.aliases.en?.[0]?.value, "id": licenseId };
})();

const name =
entity.labels?.en?.value ?? entity.labels?.fr?.value ?? entity.labels[Object.keys(entity.labels)[0]].value;
const description =
entity.descriptions?.en?.value ??
entity.descriptions?.fr?.value ??
entity.descriptions?.[Object.keys(entity.descriptions)[0]]?.value ??
"";

return {
softwareName: name,
softwareDescription: description,
softwareType: {
// Todo // P306
type: "desktop/mobile",
os: { "linux": true, "windows": true, "android": false, "ios": false, "mac": false }
},
externalId: wikidataId,
comptoirDuLibreId: undefined,
softwareLicense: license?.label ?? "Copyright",
softwareMinimalVersion: undefined,
similarSoftwareExternalDataIds: [],
softwareLogoUrl: `https://upload.wikimedia.org/wikipedia/commons/6/69/${logoName?.replace(" ", "_") ?? ""}`,
softwareKeywords: [],
isPresentInSupportContract: false,
isFromFrenchPublicService: false,
doRespectRgaa: false
};
} catch (error) {
console.error(`Error for ${wikidataId} : `, error);
// Expected output: ReferenceError: nonExistentFunction is not defined
// (Note: the exact output may be browser-dependent)
}
};
4 changes: 2 additions & 2 deletions api/src/core/adapters/wikidata/getWikidataSoftware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ export class WikidataFetchError extends Error {
}
}

async function fetchEntity(wikidataId: string): Promise<{ entity: Entity }> {
export async function fetchEntity(wikidataId: string): Promise<{ entity: Entity }> {
const res = await fetch(`https://www.wikidata.org/wiki/Special:EntityData/${wikidataId}.json`).catch(
() => undefined
);
Expand All @@ -310,7 +310,7 @@ async function fetchEntity(wikidataId: string): Promise<{ entity: Entity }> {
return { entity };
}

function createGetClaimDataValue(params: { entity: Entity }) {
export function createGetClaimDataValue(params: { entity: Entity }) {
const { entity } = params;

function getClaimDataValue<Type extends "string" | "wikibase-entityid" | "text-language" | "time">(
Expand Down
17 changes: 15 additions & 2 deletions api/src/core/bootstrap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import type { UserApi } from "./ports/UserApi";
import { UseCases } from "./usecases";
import { makeGetAgent } from "./usecases/getAgent";
import { makeGetSoftwareFormAutoFillDataFromExternalAndOtherSources } from "./usecases/getSoftwareFormAutoFillDataFromExternalAndOtherSources";
import { importFromHALSource } from "./usecases/importFromSource";
import { importFromHALSource, importFromWikidataSource } from "./usecases/importFromSource";

type PgDbConfig = { dbKind: "kysely"; kyselyDb: Kysely<Database> };

Expand All @@ -35,6 +35,7 @@ type ParamsOfBootstrapCore = {
externalSoftwareDataOrigin: ExternalDataOrigin;
initializeSoftwareFromSource: boolean;
botAgentEmail: string | undefined;
listToImport?: string[];
};

export type Context = {
Expand Down Expand Up @@ -68,7 +69,8 @@ export async function bootstrapCore(
doPerformCacheInitialization,
externalSoftwareDataOrigin,
initializeSoftwareFromSource,
botAgentEmail
botAgentEmail,
listToImport
} = params;

const { getSoftwareLatestVersion } = createGetSoftwareLatestVersion({
Expand Down Expand Up @@ -131,6 +133,17 @@ export async function bootstrapCore(

console.log(" ------ Feeding database with HAL software finished ------");
guillermau marked this conversation as resolved.
Show resolved Hide resolved
}
if (externalSoftwareDataOrigin === "wikidata") {
console.log(" ------ Feeding database with Wikidata software started ------");
guillermau marked this conversation as resolved.
Show resolved Hide resolved
if (!botAgentEmail) throw new Error("No bot agent email provided");
const importWikidata = importFromWikidataSource(dbApi);
try {
await importWikidata(botAgentEmail, listToImport ?? []);
} catch (err) {
console.error(err);
}
console.log(" ------ Feeding database with Wikidata software finished ------");
guillermau marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (doPerPerformPeriodicalCompilation) {
Expand Down
38 changes: 38 additions & 0 deletions api/src/core/usecases/importFromSource.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { DbApiV2 } from "../ports/DbApiV2";
import { halAPIGateway } from "../adapters/hal/HalAPI";
import { halRawSoftwareToSoftwareForm } from "../adapters/hal/getSoftwareForm";
import { getWikidataForm } from "../adapters/wikidata/getSoftwareForm";

export const importFromHALSource: (dbApi: DbApiV2) => (agentEmail: string) => Promise<Promise<number | undefined>[]> = (
dbApi: DbApiV2
Expand Down Expand Up @@ -35,3 +36,40 @@ export const importFromHALSource: (dbApi: DbApiV2) => (agentEmail: string) => Pr
});
};
};

export const importFromWikidataSource: (
dbApi: DbApiV2
) => (agentEmail: string, softwareIds: string[]) => Promise<Promise<number | undefined>[]> = (dbApi: DbApiV2) => {
return async (agentEmail: string, softwareIds: string[]) => {
const agent = await dbApi.agent.getByEmail(agentEmail);
const agentId = agent
? agent.id
: await dbApi.agent.add({
email: agentEmail,
"isPublic": false,
organization: "",
about: "This is an bot user created to import data."
guillermau marked this conversation as resolved.
Show resolved Hide resolved
});

const dbSoftwares = await dbApi.software.getAll();
const dbSoftwaresNames = dbSoftwares.map(software => {
return software.softwareName;
});

return softwareIds.map(async (softwareId: string) => {
const newSoft = await getWikidataForm(softwareId);
if (!newSoft) {
return -1;
}

const index = dbSoftwaresNames.indexOf(newSoft?.softwareName ?? "");

if (index != -1) {
return dbSoftwares[index].softwareId;
} else {
console.log("Importing wikidata : ", softwareId);
return dbApi.software.create({ formData: newSoft, externalDataOrigin: "wikidata", agentId: agentId });
}
});
};
};
8 changes: 6 additions & 2 deletions api/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ const zConfiguration = z.object({
"externalSoftwareDataOrigin": z.enum(["wikidata", "HAL"]).optional(),
"databaseUrl": z.string(),
"initializeSoftwareFromSource": z.boolean(),
"botAgentEmail": z.string().optional()
"botAgentEmail": z.string().optional(),
"listToImport": z.array(z.string()).optional()
});

const getJsonConfiguration = () => {
Expand Down Expand Up @@ -69,7 +70,10 @@ const getJsonConfiguration = () => {
"redirectUrl": process.env.SILL_REDIRECT_URL,
"databaseUrl": process.env.DATABASE_URL,
"initializeSoftwareFromSource": process.env.INIT_SOFT_FROM_SOURCE?.toLowerCase() === "true",
"botAgentEmail": process.env?.BOT_AGENT_EMAIL
"botAgentEmail": process.env?.BOT_AGENT_EMAIL,
"listToImport": process.env?.SILL_IMPORT_WIKIDATA
? JSON.parse(process.env?.SILL_IMPORT_WIKIDATA)?.ids
: undefined
};
};

Expand Down
5 changes: 4 additions & 1 deletion api/src/rpc/start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export async function startRpcService(params: {
databaseUrl: string;
initializeSoftwareFromSource: boolean;
botAgentEmail?: string;
listToImport?: string[];
}) {
const {
redirectUrl,
Expand All @@ -57,6 +58,7 @@ export async function startRpcService(params: {
databaseUrl,
botAgentEmail,
initializeSoftwareFromSource,
listToImport,
...rest
} = params;

Expand Down Expand Up @@ -90,7 +92,8 @@ export async function startRpcService(params: {
"doPerformCacheInitialization": redirectUrl === undefined,
"externalSoftwareDataOrigin": externalSoftwareDataOrigin,
"botAgentEmail": botAgentEmail,
"initializeSoftwareFromSource": initializeSoftwareFromSource
"initializeSoftwareFromSource": initializeSoftwareFromSource,
"listToImport": listToImport ?? []
});

console.log("Core API initialized");
Expand Down
7 changes: 4 additions & 3 deletions deployments/docker-compose-example/.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ SILL_GITHUB_TOKEN=xxxxx
SILL_API_PORT=3084
SILL_IS_DEV_ENVIRONNEMENT=true
SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN=wikidata
SILL_IMPORT_WIKIDATA=|
{
ids: ['Q10135']
}
guillermau marked this conversation as resolved.
Show resolved Hide resolved
INIT_SOFT_FROM_SOURCE=false
[email protected]

DATABASE_URL=postgresql://sill:pg_password@localhost:5432/sill
POSTGRES_DB=sill
POSTGRES_USER=sill
POSTGRES_PASSWORD=pg_password

guillermau marked this conversation as resolved.
Show resolved Hide resolved
# this is only for the script load-git-repo-in-pg.ts, not needed for api any more
[email protected]:codegouvfr/sill-data-test.git
Expand Down
Loading