From 410db2f5afbfa99ad50b612927a5e340e25c4688 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 28 Feb 2024 13:36:37 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A8=20migrate=20PostLink=20to=20knex?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- baker/postUpdatedHook.ts | 31 +++++----- db/model/PostLink.ts | 125 ++++++++++++++++++++++++++------------- db/syncPostsToGrapher.ts | 62 +++++++++++-------- 3 files changed, 139 insertions(+), 79 deletions(-) diff --git a/baker/postUpdatedHook.ts b/baker/postUpdatedHook.ts index bdcc5b070b4..4a997ea1e0a 100644 --- a/baker/postUpdatedHook.ts +++ b/baker/postUpdatedHook.ts @@ -18,13 +18,19 @@ import { getLinksToAddAndRemoveForPost, } from "../db/syncPostsToGrapher.js" import { postsTable, select } from "../db/model/Post.js" -import { PostLink } from "../db/model/PostLink.js" +import { + deleteManyPostLinks, + getPostLinksBySourceId, + insertManyPostLinks, +} from "../db/model/PostLink.js" +import { Knex } from "knex" const argv = parseArgs(process.argv.slice(2)) const zeroDateString = "0000-00-00 00:00:00" // Sync post from the wordpress database to OWID database const syncPostToGrapher = async ( + knex: Knex, postId: number ): Promise => { const rows = await wpdb.singleton.query( @@ -163,9 +169,10 @@ const syncPostToGrapher = async ( )[0] if (postRow) { - const existingLinksForPost = await PostLink.findBy({ - sourceId: wpPost.ID, - }) + const existingLinksForPost = await getPostLinksBySourceId( + knex, + wpPost.ID + ) const { linksToAdd, linksToDelete } = getLinksToAddAndRemoveForPost( postRow, @@ -177,19 +184,15 @@ const syncPostToGrapher = async ( // TODO: unify our DB access and then do everything in one transaction if (linksToAdd.length) { console.log("linksToAdd", linksToAdd.length) - await PostLink.createQueryBuilder() - .insert() - .into(PostLink) - .values(linksToAdd) - .execute() + await insertManyPostLinks(knex, linksToAdd) } if (linksToDelete.length) { console.log("linksToDelete", linksToDelete.length) - await PostLink.createQueryBuilder() - .where("id in (:ids)", { ids: linksToDelete.map((x) => x.id) }) - .delete() - .execute() + await deleteManyPostLinks( + knex, + linksToDelete.map((x) => x.id) + ) } } return newPost ? newPost.slug : undefined @@ -203,7 +206,7 @@ const main = async ( ) => { console.log(email, name, postId) try { - const slug = await syncPostToGrapher(postId) + const slug = await syncPostToGrapher(db.knexInstance(), postId) if (BAKE_ON_CHANGE) await new DeployQueueServer().enqueueChange({ diff --git a/db/model/PostLink.ts b/db/model/PostLink.ts index 35db9cb5d3c..5f30e6951e2 100644 --- a/db/model/PostLink.ts +++ b/db/model/PostLink.ts @@ -1,47 +1,92 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from "typeorm" import { formatUrls } from "../../site/formatting.js" -import { Url } from "@ourworldindata/utils" +import { + DbInsertPostLink, + DbPlainPostLink, + PostsLinksTableName, + Url, +} from "@ourworldindata/utils" import { getLinkType, getUrlTarget } from "@ourworldindata/components" +import { Knex } from "knex" +export function postLinkCreateFromUrl({ + url, + sourceId, + text = "", + componentType = "", +}: { + url: string + sourceId: number + text?: string + componentType?: string +}): Omit { + const formattedUrl = formatUrls(url) + const urlObject = Url.fromURL(formattedUrl) + const linkType = getLinkType(formattedUrl) + const target = getUrlTarget(formattedUrl) + const queryString = urlObject.queryStr + const hash = urlObject.hash + return { + target, + linkType, + queryString, + hash, + sourceId, + text, + componentType, + } +} -@Entity("posts_links") -export class PostLink extends BaseEntity { - @PrimaryGeneratedColumn() id!: number - // TODO: posts is not a TypeORM but a Knex class so we can't use a TypeORM relationship here yet +export async function getPostLinkById( + knex: Knex, + id: number +): Promise { + return knex(PostsLinksTableName).where({ id }).first() +} - @Column({ type: "int", nullable: false }) sourceId!: number +export async function getAllPostLinks( + knex: Knex +): Promise { + return knex(PostsLinksTableName) +} - @Column() linkType!: "gdoc" | "url" | "grapher" | "explorer" - @Column() target!: string - @Column() queryString!: string - @Column() hash!: string - @Column() componentType!: string - @Column() text!: string +export async function getPostLinksBySourceId( + knex: Knex, + sourceId: number +): Promise { + return knex(PostsLinksTableName).where({ sourceId }) +} - static createFromUrl({ - url, - sourceId, - text = "", - componentType = "", - }: { - url: string - sourceId: number - text?: string - componentType?: string - }): PostLink { - const formattedUrl = formatUrls(url) - const urlObject = Url.fromURL(formattedUrl) - const linkType = getLinkType(formattedUrl) - const target = getUrlTarget(formattedUrl) - const queryString = urlObject.queryStr - const hash = urlObject.hash - return PostLink.create({ - target, - linkType, - queryString, - hash, - sourceId, - text, - componentType, - }) - } +export async function insertPostLink( + knex: Knex, + postLink: DbInsertPostLink +): Promise<{ id: number }> { + return knex(PostsLinksTableName).returning("id").insert(postLink) +} + +export async function insertManyPostLinks( + knex: Knex, + postLinks: DbInsertPostLink[] +): Promise { + return knex.batchInsert(PostsLinksTableName, postLinks) +} + +export async function updatePostLink( + knex: Knex, + id: number, + postLink: DbInsertPostLink +): Promise { + return knex(PostsLinksTableName).where({ id }).update(postLink) +} + +export async function deletePostLink( + knex: Knex, + id: number +): Promise { + return knex(PostsLinksTableName).where({ id }).delete() +} + +export async function deleteManyPostLinks( + knex: Knex, + ids: number[] +): Promise { + return knex(PostsLinksTableName).whereIn("id", ids).delete() } diff --git a/db/syncPostsToGrapher.ts b/db/syncPostsToGrapher.ts index 9317b76905c..936f8d3f3c8 100644 --- a/db/syncPostsToGrapher.ts +++ b/db/syncPostsToGrapher.ts @@ -11,11 +11,19 @@ import { DbEnrichedPost, sortBy, serializePostRow, + DbPlainPostLink, + DbInsertPostLink, } from "@ourworldindata/utils" import { postsTable, select } from "./model/Post.js" -import { PostLink } from "./model/PostLink.js" +import { + deleteManyPostLinks, + getAllPostLinks, + insertManyPostLinks, + postLinkCreateFromUrl, +} from "./model/PostLink.js" import { renderTablePress } from "../site/Tablepress.js" import pMap from "p-map" +import { Knex } from "knex" const zeroDateString = "0000-00-00 00:00:00" @@ -157,15 +165,18 @@ export async function buildTablePressResolver(): Promise { replaceTablePressShortcodes(content, replacerFunction) } -export const postLinkCompareStringGenerator = (item: PostLink): string => +export const postLinkCompareStringGenerator = (item: DbPlainPostLink): string => `${item.linkType} - ${item.target} - ${item.hash} - ${item.queryString}` export function getLinksToAddAndRemoveForPost( post: DbEnrichedPost, - existingLinksForPost: PostLink[], + existingLinksForPost: DbPlainPostLink[], content: string, postId: number -): { linksToAdd: PostLink[]; linksToDelete: PostLink[] } { +): { + linksToAdd: Omit[] + linksToDelete: DbPlainPostLink[] +} { const linksInDb = groupBy( existingLinksForPost, postLinkCompareStringGenerator @@ -206,15 +217,15 @@ export function getLinksToAddAndRemoveForPost( ) const linksInDocument = keyBy( [ - ...allHrefs.map((link) => PostLink.createFromUrl(link)), - ...allSrcs.map((link) => PostLink.createFromUrl(link)), - ...allProminentLinks.map((link) => PostLink.createFromUrl(link)), + ...allHrefs.map((link) => postLinkCreateFromUrl(link)), + ...allSrcs.map((link) => postLinkCreateFromUrl(link)), + ...allProminentLinks.map((link) => postLinkCreateFromUrl(link)), ], postLinkCompareStringGenerator ) - const linksToAdd: PostLink[] = [] - const linksToDelete: PostLink[] = [] + const linksToAdd: Omit[] = [] + const linksToDelete: DbPlainPostLink[] = [] // This is doing a set difference, but we want to do the set operation on a subset // of fields (the ones we stringify into the compare key) while retaining the full @@ -222,14 +233,15 @@ export function getLinksToAddAndRemoveForPost( for (const [linkInDocCompareKey, linkInDoc] of Object.entries( linksInDocument )) - if (!(linkInDocCompareKey in linksInDb)) linksToAdd.push(linkInDoc) + if (!(linkInDocCompareKey in linksInDb)) + linksToAdd.push(linkInDoc as Omit) for (const [linkInDbCompareKey, linkInDb] of Object.entries(linksInDb)) if (!(linkInDbCompareKey in linksInDocument)) linksToDelete.push(...linkInDb) return { linksToAdd, linksToDelete } } -const syncPostsToGrapher = async (): Promise => { +const syncPostsToGrapher = async (knex: Knex): Promise => { const dereferenceReusableBlocksFn = await buildReusableBlocksResolver() const dereferenceTablePressFn = await buildTablePressResolver() @@ -372,11 +384,14 @@ const syncPostsToGrapher = async (): Promise => { }, { concurrency: 20 } )) as DbEnrichedPost[] - const postLinks = await PostLink.find() - const postLinksById = groupBy(postLinks, (link: PostLink) => link.sourceId) + const postLinks = await getAllPostLinks(knex) + const postLinksById = groupBy( + postLinks, + (link: DbPlainPostLink) => link.sourceId + ) - const linksToAdd: PostLink[] = [] - const linksToDelete: PostLink[] = [] + const linksToAdd: DbInsertPostLink[] = [] + const linksToDelete: DbPlainPostLink[] = [] for (const post of rows) { const existingLinksForPost = postLinksById[post.ID] @@ -409,26 +424,23 @@ const syncPostsToGrapher = async (): Promise => { // TODO: unify our DB access and then do everything in one transaction if (linksToAdd.length) { console.log("linksToAdd", linksToAdd.length) - await PostLink.createQueryBuilder() - .insert() - .into(PostLink) - .values(linksToAdd) - .execute() + await insertManyPostLinks(knex, postLinks) } if (linksToDelete.length) { console.log("linksToDelete", linksToDelete.length) - await PostLink.createQueryBuilder() - .where("id in (:ids)", { ids: linksToDelete.map((x) => x.id) }) - .delete() - .execute() + await deleteManyPostLinks( + knex, + linksToDelete.map((link) => link.id) + ) } } const main = async (): Promise => { try { await db.getConnection() - await syncPostsToGrapher() + const knex = db.knexInstance() + await syncPostsToGrapher(knex) } finally { await wpdb.singleton.end() await db.closeTypeOrmAndKnexConnections()