From e04cb1e3e99c2018215fd2ac9205b332e6cc50e2 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 2 Dec 2024 10:13:26 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9D=20simplify,=20comment=20and=20clea?= =?UTF-8?q?nup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 17 +- ...2626230267-addPostsGdocsComponentsTable.ts | 2 +- db/model/Gdoc/GdocFactory.ts | 6 +- db/model/Gdoc/extractGdocComponentInfo.ts | 353 ++++++++++-------- 4 files changed, 204 insertions(+), 174 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 32bea3c217..127779666c 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -109,6 +109,7 @@ import { DbPlainChartView, ChartViewsTableName, DbInsertChartView, + PostsGdocsComponentsTableName, CHART_VIEW_PROPS_TO_PERSIST, CHART_VIEW_PROPS_TO_OMIT, } from "@ourworldindata/types" @@ -177,7 +178,6 @@ import { addImagesToContentGraph, updateGdocContentOnly, upsertGdoc, - updateDerivedGdocPostsComponents, } from "../db/model/Gdoc/GdocFactory.js" import { match } from "ts-pattern" import { GdocDataInsight } from "../db/model/Gdoc/GdocDataInsight.js" @@ -2781,11 +2781,6 @@ postRouteWithRWTransaction( gdoc.createdAt = new Date() gdoc.publishedAt = post.published_at await upsertGdoc(trx, gdoc) - await updateDerivedGdocPostsComponents( - trx, - gdoc.id, - gdoc.content.body - ) await setTagsForGdoc(trx, gdocId, tags) } return { googleDocsId: gdocId } @@ -2987,11 +2982,7 @@ putRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => { ) await upsertGdoc(trx, nextGdoc) - await updateDerivedGdocPostsComponents( - trx, - nextGdoc.id, - nextGdoc.content.body - ) + await indexAndBakeGdocIfNeccesary(trx, res.locals.user, prevGdoc, nextGdoc) return nextGdoc @@ -3047,6 +3038,10 @@ deleteRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => { await trx.table(PostsGdocsLinksTableName).where({ sourceId: id }).delete() await trx.table(PostsGdocsXImagesTableName).where({ gdocId: id }).delete() await trx.table(PostsGdocsTableName).where({ id }).delete() + await trx + .table(PostsGdocsComponentsTableName) + .where({ gdocId: id }) + .delete() if (gdoc.published && checkIsGdocPostExcludingFragments(gdoc)) { await removeIndividualGdocPostFromIndex(gdoc) } diff --git a/db/migration/1732626230267-addPostsGdocsComponentsTable.ts b/db/migration/1732626230267-addPostsGdocsComponentsTable.ts index 3bce2998d7..936da8142e 100644 --- a/db/migration/1732626230267-addPostsGdocsComponentsTable.ts +++ b/db/migration/1732626230267-addPostsGdocsComponentsTable.ts @@ -11,7 +11,7 @@ CREATE TABLE posts_gdocs_components ( config JSON, parent VARCHAR(1024), path VARCHAR(1024), - FOREIGN KEY (gdocId) REFERENCES posts_gdocs(id), + FOREIGN KEY (gdocId) REFERENCES posts_gdocs(id) ON DELETE CASCADE ON UPDATE CASCADE, INDEX idx_gdocId (gdocId) ) ENGINE=InnoDB; `) diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index 1011b06bdf..3fb41561df 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -128,7 +128,7 @@ export async function createGdocAndInsertIntoDb( // while fetching the live gdocs (GdocsContentSource.Gdocs) in // loadGdocFromGdocBase(). await upsertGdoc(knex, gdoc) - await updateDerivedGdocPostsComponents(knex, gdoc.id, gdoc.content.body) + return gdoc } @@ -616,7 +616,9 @@ export async function upsertGdoc( .onConflict("id") .merge() sql = query.toSQL() - return query + const indices = await query + await updateDerivedGdocPostsComponents(knex, gdoc.id, gdoc.content.body) + return indices } catch (e) { console.error(`Error occured in sql: ${sql}`, e) throw e diff --git a/db/model/Gdoc/extractGdocComponentInfo.ts b/db/model/Gdoc/extractGdocComponentInfo.ts index a55317890d..e23a657347 100644 --- a/db/model/Gdoc/extractGdocComponentInfo.ts +++ b/db/model/Gdoc/extractGdocComponentInfo.ts @@ -18,14 +18,13 @@ interface ComponentInfo { parentPath: string path: string } + +/** Specialized iteration function for the key-insights block */ function iterateKeyInsights( parent: T, parentPath: string, - prop: keyof T + _prop: keyof T ): ChildIterationInfo[] { - // Todo: there is a difference between props that are lists and single - // item props. the default should be the list and then we need to - // build up the .[0] part of the path const items: ChildIterationInfo[] = [] for (let i = 0; i < parent.insights.length; i++) { const slide = parent.insights[i] @@ -40,14 +39,12 @@ function iterateKeyInsights( return items } +/** Specialized iteration function for the table block */ function iterateTableProp( parent: T, parentPath: string, - prop: keyof T + _prop: keyof T ): ChildIterationInfo[] { - // Todo: there is a difference between props that are lists and single - // item props. the default should be the list and then we need to - // build up the .[0] part of the path const items: ChildIterationInfo[] = [] for (let i = 0; i < parent.rows.length; i++) { const row = parent.rows[i] @@ -64,14 +61,13 @@ function iterateTableProp( return items } +/** The default iteration function for the common case where a property on an + OwidEnrichedGdocBlock contains an array of OwidEnrichedGdocBlocks */ function iterateArrayProp( parent: T, parentPath: string, prop: keyof T ): ChildIterationInfo[] { - // Todo: there is a difference between props that are lists and single - // item props. the default should be the list and then we need to - // build up the .[0] part of the path return (parent[prop] as OwidEnrichedGdocBlock[]).map((child, index) => ({ child: child, parentPath: `${parentPath}`, @@ -79,6 +75,39 @@ function iterateArrayProp( })) } +/** Convert the spans in a gdoc component to plain text. + + The function does this by checking if the given value is an object + with a "spanType" property. If it is, it is assumed to be a span and + the text is extracted. If the value is an array that contains objects + with a "spanType" property, the value is turned into a string. For + other cases we recurse and copy the value as is. +*/ +function convertSpansToPlainText(obj: any): any { + if (Array.isArray(obj)) { + if ( + obj.length > 0 && + obj.every( + (item) => typeof item === "object" && item && "spanType" in item + ) + ) { + return spansToUnformattedPlainText(obj) + } + return obj.map((item) => convertSpansToPlainText(item)) + } + if (typeof obj === "object" && obj !== null) { + if (typeof obj === "object" && "spanType" in obj) { + return spansToUnformattedPlainText([obj]) + } + const result: Record = {} + for (const [key, value] of Object.entries(obj)) { + result[key] = convertSpansToPlainText(value) + } + return result + } + return obj +} + function handleComponent( component: T, childProperties: { @@ -95,32 +124,11 @@ function handleComponent( const props: (keyof T)[] = childProperties.map( (childProp) => childProp.prop ) - function convertSpansToPlainText(obj: any): any { - if (Array.isArray(obj)) { - if ( - obj.length > 0 && - obj.every( - (item) => - typeof item === "object" && item && "spanType" in item - ) - ) { - return spansToUnformattedPlainText(obj) - } - return obj.map((item) => convertSpansToPlainText(item)) - } - if (typeof obj === "object" && obj !== null) { - if (typeof obj === "object" && "spanType" in obj) { - return spansToUnformattedPlainText([obj]) - } - const result: Record = {} - for (const [key, value] of Object.entries(obj)) { - result[key] = convertSpansToPlainText(value) - } - return result - } - return obj - } + // This function is the workhorse of turning a gdoc component with children in + // the component tree into a flat list of components. + + // For the component itself we want to omit the children and convert the spans to plain text. const item: ComponentInfo = { content: convertSpansToPlainText( omit({ ...component }, props) @@ -130,7 +138,8 @@ function handleComponent( } const components = [] - + // Now we iterate over the children (using the provided iterator function since the structure of the children can vary) + // and recursively call this function on each child. for (const { prop, iterator } of childProperties) { try { const children = iterator(component, `${path}`, prop) @@ -155,144 +164,168 @@ export function enumerateGdocComponentsWithoutChildren( parentPath: string, path: string ): ComponentInfo[] { - return match(node) - .with( - { type: P.union("sticky-right", "sticky-left", "side-by-side") }, - (container) => + // Our gdoc components fall into three groups: + // 1. components that do not have Block children, e.g. "heading". + // These are handled at the bottom of this match block. + // 2. components that have direct block children, e.g. "sticky-right" + // These have one or more props that are arrays of blocks (OwidEnrichedGdocBlocks) + // These blocks use the standard `iterateArrayProp` enumeration function below + // 3. components that have children that are not blocks, e.g. "key-insights" + // These have one or more props that have a structure that is not simply OwidEnrichedGdocBlocks + // Key insights have "insight-slide" children for example that have a title and so on + // and then also have a "content" prop that is an array of blocks. + // These are handled by custom enumeration functions like `iterateKeyInsights` + return ( + match(node) + .with( + { + type: P.union( + "sticky-right", + "sticky-left", + "side-by-side" + ), + }, + (container) => + handleComponent( + container, + [ + { prop: "left", iterator: iterateArrayProp }, + { prop: "right", iterator: iterateArrayProp }, + ], + parentPath, + path + ) + ) + .with({ type: "gray-section" }, (graySection) => handleComponent( - container, - [ - { prop: "left", iterator: iterateArrayProp }, - { prop: "right", iterator: iterateArrayProp }, - ], + graySection, + [{ prop: "items", iterator: iterateArrayProp }], parentPath, path ) - ) - .with({ type: "gray-section" }, (graySection) => - handleComponent( - graySection, - [{ prop: "items", iterator: iterateArrayProp }], - parentPath, - path ) - ) - .with({ type: "key-insights" }, (keyInsights) => - handleComponent( - keyInsights, - [{ prop: "insights", iterator: iterateKeyInsights }], - parentPath, - path + .with({ type: "key-insights" }, (keyInsights) => + handleComponent( + keyInsights, + [{ prop: "insights", iterator: iterateKeyInsights }], + parentPath, + path + ) + ) + .with({ type: "callout" }, (callout) => + handleComponent( + callout, + [{ prop: "text", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "callout" }, (callout) => - handleComponent( - callout, - [{ prop: "text", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "list" }, (list) => + handleComponent( + list, + [{ prop: "items", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "list" }, (list) => - handleComponent( - list, - [{ prop: "items", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "numbered-list" }, (numberedList) => + handleComponent( + numberedList, + [{ prop: "items", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "numbered-list" }, (numberedList) => - handleComponent( - numberedList, - [{ prop: "items", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "expandable-paragraph" }, (expandableParagraph) => + handleComponent( + expandableParagraph, + [{ prop: "items", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "expandable-paragraph" }, (expandableParagraph) => - handleComponent( - expandableParagraph, - [{ prop: "items", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "align" }, (align) => + handleComponent( + align, + [{ prop: "content", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "align" }, (align) => - handleComponent( - align, - [{ prop: "content", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "table" }, (table) => + handleComponent( + table, + [{ prop: "rows", iterator: iterateTableProp }], + parentPath, + path + ) ) - ) - .with({ type: "table" }, (table) => - handleComponent( - table, - [{ prop: "rows", iterator: iterateTableProp }], - parentPath, - path + .with({ type: "blockquote" }, (blockquote) => + handleComponent( + blockquote, + [{ prop: "text", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "blockquote" }, (blockquote) => - handleComponent( - blockquote, - [{ prop: "text", iterator: iterateArrayProp }], - parentPath, - path + .with({ type: "key-indicator" }, (keyIndicator) => + handleComponent( + keyIndicator, + [{ prop: "text", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "key-indicator" }, (keyIndicator) => - handleComponent( - keyIndicator, - [{ prop: "text", iterator: iterateArrayProp }], - parentPath, - path + .with( + { type: "key-indicator-collection" }, + (keyIndicatorCollection) => + handleComponent( + keyIndicatorCollection, + [{ prop: "blocks", iterator: iterateArrayProp }], + parentPath, + path + ) ) - ) - .with({ type: "key-indicator-collection" }, (keyIndicatorCollection) => - handleComponent( - keyIndicatorCollection, - [{ prop: "blocks", iterator: iterateArrayProp }], - parentPath, - path + .with( + { + type: P.union( + "chart-story", + "chart", + "horizontal-rule", + "html", + "image", + "video", + "missing-data", + "prominent-link", + "pull-quote", + "recirc", + "research-and-writing", + "scroller", + "sdg-grid", + "sdg-toc", + "topic-page-intro", + "all-charts", + "entry-summary", + "explorer-tiles", + "pill-row", + "homepage-search", + "homepage-intro", + "latest-data-insights", + "socials", + "aside", + "text", + "heading", + "additional-charts", + "simple-text" + ), + }, + (c) => handleComponent(c, [], parentPath, path) ) - ) - .with( - { - type: P.union( - "chart-story", - "chart", - "horizontal-rule", - "html", - "image", - "video", - "missing-data", - "prominent-link", - "pull-quote", - "recirc", - "research-and-writing", - "scroller", - "sdg-grid", - "sdg-toc", - "topic-page-intro", - "all-charts", - "entry-summary", - "explorer-tiles", - "pill-row", - "homepage-search", - "homepage-intro", - "latest-data-insights", - "socials", - "aside", - "text", - "heading", - "additional-charts", - "simple-text" - ), - }, - (c) => handleComponent(c, [], parentPath, path) - ) - .exhaustive() + // Hey dev! If you get here because you add a new component, + // read the comment at the top of the match block for some + // guidance on how to handle new components. + .exhaustive() + ) } export function getGdocComponentsWithoutChildren(