From 586b7ea113440b7dc8113dada8ea4ca1ee485452 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Tue, 12 Nov 2024 20:12:27 +0000 Subject: [PATCH 01/40] =?UTF-8?q?=F0=9F=8E=89=20add=20cloudflareId=20to=20?= =?UTF-8?q?images=20table?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/migration/1731360326761-CloudflareImages.ts | 15 +++++++++++++++ .../@ourworldindata/types/src/dbTypes/Images.ts | 1 + 2 files changed, 16 insertions(+) create mode 100644 db/migration/1731360326761-CloudflareImages.ts diff --git a/db/migration/1731360326761-CloudflareImages.ts b/db/migration/1731360326761-CloudflareImages.ts new file mode 100644 index 00000000000..001a6a51d55 --- /dev/null +++ b/db/migration/1731360326761-CloudflareImages.ts @@ -0,0 +1,15 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class CloudflareImages1731360326761 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE images ADD COLUMN cloudflareId VARCHAR(255) NULL + `) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE images DROP COLUMN cloudflareId + `) + } +} diff --git a/packages/@ourworldindata/types/src/dbTypes/Images.ts b/packages/@ourworldindata/types/src/dbTypes/Images.ts index 1efddb86d2a..430931cf3ce 100644 --- a/packages/@ourworldindata/types/src/dbTypes/Images.ts +++ b/packages/@ourworldindata/types/src/dbTypes/Images.ts @@ -7,6 +7,7 @@ export interface DbInsertImage { originalWidth?: number | null originalHeight?: number | null updatedAt?: string | null // MySQL Date objects round to the nearest second, whereas Google includes milliseconds so we store as an epoch of type bigint to avoid any conversion issues + cloudflareId?: string | null } export type DbRawImage = Required export type DbEnrichedImage = Omit & { From 080c2af513947c2eb64dfb85157d487c6371185c Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Tue, 12 Nov 2024 17:25:34 -0500 Subject: [PATCH 02/40] =?UTF-8?q?=F0=9F=8E=89=20add=20cloudflare=20images?= =?UTF-8?q?=20sync=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 4 + .../cloudflareImagesSync.ts | 458 ++++++++++++++++++ devTools/cloudflareImagesSync/tsconfig.json | 15 + package.json | 5 +- settings/serverSettings.ts | 6 + yarn.lock | 31 ++ 6 files changed, 518 insertions(+), 1 deletion(-) create mode 100644 devTools/cloudflareImagesSync/cloudflareImagesSync.ts create mode 100644 devTools/cloudflareImagesSync/tsconfig.json diff --git a/Makefile b/Makefile index 8c7f50ed801..108e206e8ad 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,10 @@ sync-images: sync-images.preflight-check @echo '==> Syncing images to R2' ./devTools/docker/sync-s3-images.sh +sync-cloudflare-images: + @echo '==> Syncing images to Cloudflare' + @yarn syncCloudflareImages + refresh.full: refresh refresh.pageviews sync-images @echo '==> Full refresh completed' @make bake-images diff --git a/devTools/cloudflareImagesSync/cloudflareImagesSync.ts b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts new file mode 100644 index 00000000000..94d1e4090c5 --- /dev/null +++ b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts @@ -0,0 +1,458 @@ +const is = require("image-size") +import * as readline from "readline" +import pMap from "p-map" +import path from "path" +import fs from "fs/promises" +import { DbEnrichedImage } from "@ourworldindata/types" +import * as db from "../../db/db.js" +import { + CLOUDFLARE_IMAGES_ACCOUNT_ID, + CLOUDFLARE_IMAGES_API_KEY, + IMAGE_HOSTING_R2_CDN_URL, +} from "../../settings/serverSettings.js" +import { excludeNullish, keyBy } from "@ourworldindata/utils" + +type CloudflareImageDirectory = Record + +enum InvalidImageReason { + TooLarge = "TooLarge", + InvalidFormat = "InvalidFormat", + InvalidDimensions = "InvalidDimensions", + TooManyMegapixels = "TooManyMegapixels", + InvalidMetadata = "InvalidMetadata", + UnknownError = "UnknownError", +} + +type ImageValidationObject = { + filename: string + reason: InvalidImageReason + extra?: any +} + +function stringifyImageMetadata(image: DbEnrichedImage) { + return JSON.stringify({ + filename: image.filename, + }) +} + +/** + * Make sure that each database cloudflareId corresponds to a valid image in the Cloudflare Images directory + */ +async function validateDirectory( + trx: db.KnexReadWriteTransaction, + directory: CloudflareImageDirectory +): Promise<{ isValid: boolean; invalidImages: string[] }> { + const imagesWithIds = await db.knexRaw<{ + filename: string + cloudflareId: string + }>( + trx, + `-- sql + SELECT filename, cloudflareId FROM images WHERE cloudflareId IS NOT NULL` + ) + const imagesSharingCloudflareIds = await db + .knexRaw<{ + cloudflareId: string + count: number + filenames: string + }>( + trx, + `-- sql + SELECT + cloudflareId, + COUNT(*) as count, + JSON_ARRAYAGG( + filename + ) as filenames + FROM images + WHERE cloudflareId IS NOT NULL + GROUP BY cloudflareId + HAVING count > 1` + ) + .then((results) => + results.map((result) => ({ + cloudflareId: result.cloudflareId, + count: result.count, + filenames: JSON.parse(result.filenames) as string[], + })) + ) + .then((results) => keyBy(results, "cloudflareId")) + + const invalidImages: string[] = [] + for (const image of imagesWithIds) { + if (!directory[image.filename]) { + // If an identical image was uploaded with multiple filenames, subsequent copies will use the same cloudflareId as the first + // so let's check if this is a case of that + const imagesSharingCloudflareId = + imagesSharingCloudflareIds[image.cloudflareId] + if (imagesSharingCloudflareId) { + const filenames = imagesSharingCloudflareId.filenames + if (filenames.includes(image.filename)) { + console.log( + `Image with filename "${image.filename}" has a cloudflareId that is shared with other images.` + ) + continue + } + } + console.log( + `Image with filename "${image.filename}" has a cloudflareId that is not in the Cloudflare Images directory.` + ) + invalidImages.push(image.filename) + } + } + return { + isValid: invalidImages.length === 0, + invalidImages, + } +} + +async function purgeRecords(trx: db.KnexReadWriteTransaction) { + await new Promise((resolve) => { + const readlineInterface = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + readlineInterface.question( + "Are you sure you want to delete ALL images from Cloudflare Images? (y/n) ", + (answer) => { + if (answer.toLowerCase() === "y") { + resolve() + } else { + console.log("Aborting.") + process.exit(0) + } + readlineInterface.close() + } + ) + }) + + const directory = await getCloudflareImageDirectory() + console.log("Deleting all images from Cloudflare Images...") + await pMap( + Object.values(directory), + async (image) => { + console.log("Deleting image:", image.filename) + try { + await fetch( + `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1/${image.id}`, + { + method: "DELETE", + headers: { + Authorization: `Bearer ${CLOUDFLARE_IMAGES_API_KEY}`, + }, + } + ) + } catch (e) { + console.error(e) + } + }, + { concurrency: 10 } + ) + console.log("Finished") + + await new Promise((resolve) => { + const readlineInterface = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + readlineInterface.question( + "Would you also like to set all cloudflareIds to NULL in the DB? (y/n) ", + (answer) => { + if (answer.toLowerCase() === "y") { + resolve() + } else { + console.log("Aborting.") + process.exit(0) + } + readlineInterface.close() + } + ) + }) + console.log("May God have mercy on your soul.") + + await db.knexRaw( + trx, + `-- sql + UPDATE images + SET cloudflareId = NULL` + ) + console.log("All cloudflareIds set to NULL in the DB.") +} + +/** + * Cloudflare has a width/height of 12000px, metadata of 1024B, 100megapixels, and a 10MB filesize limit + */ +function validateImage( + imageBuffer: Buffer, + metadata: string +): InvalidImageReason | null { + const imageSize = is(imageBuffer) + if (!imageSize) { + return InvalidImageReason.InvalidFormat + } + + if (imageSize.width > 12000 || imageSize.height > 12000) { + return InvalidImageReason.InvalidDimensions + } + + if (imageSize.width * imageSize.height > 100 * 1000000) { + return InvalidImageReason.TooManyMegapixels + } + + if (imageBuffer.byteLength > 10 * 1024 * 1024) { + return InvalidImageReason.TooLarge + } + + if (Buffer.byteLength(metadata, "utf8") > 1024) { + return InvalidImageReason.InvalidMetadata + } + + return null +} + +async function checkIfAlreadyUploadedToCloudflareImages( + filename: string, + cloudflareImagesDirectory: CloudflareImageDirectory +): Promise { + if (cloudflareImagesDirectory[filename]) { + console.log( + `Image with filename "${filename}" has already uploaded to Cloudflare Images.` + ) + return true + } + return false +} + +async function checkIfAlreadyTrackedInDB( + trx: db.KnexReadWriteTransaction, + filename: string +) { + console.log("Checking to see if the DB has the Cloudflare ID...") + const cloudflareId = await trx + .raw<{ cloudflareId: string }[][]>( + `-- sql + SELECT cloudflareId FROM images WHERE filename = ? + `, + [filename] + ) + .then((res) => res[0][0]?.cloudflareId) + if (!cloudflareId) { + console.log("No Cloudflare ID found in the DB.") + return false + } else { + console.log(`Cloudflare ID "${cloudflareId}" exists in the DB.`) + return true + } +} + +async function updateDbWithCloudflareId( + trx: db.KnexReadWriteTransaction, + filename: string, + cloudflareId: string +) { + console.log("Updating the DB with the Cloudflare ID...") + await trx.raw( + `-- sql + UPDATE images + SET cloudflareId = ? + WHERE filename = ?`, + [cloudflareId, filename] + ) +} + +async function uploadImageToCloudflareImages( + trx: db.KnexReadWriteTransaction, + image: DbEnrichedImage, + invalidImages: ImageValidationObject[], + cloudflareImagesDirectory: CloudflareImageDirectory +) { + const filename = image.filename + + /** + * If the image is already tracked in the DB, we don't need to do anything. + * If the image is already uploaded to Cloudflare Images, we check if we need to update the DB with the cloudflareId. + * It's possible the image has already been uploaded but is saved under a different filename, + * in which case we go through the normal process of uploading the image, + * which is a no-op for Cloudflare, but will give us the right ID to update the DB with. + */ + const alreadyTracked = await checkIfAlreadyTrackedInDB(trx, filename) + const alreadyUploaded = await checkIfAlreadyUploadedToCloudflareImages( + filename, + cloudflareImagesDirectory + ) + if (alreadyTracked) { + return + } + if (alreadyUploaded) { + const cloudflareId = cloudflareImagesDirectory[filename].id + await updateDbWithCloudflareId(trx, filename, cloudflareId) + return + } + + const imageUrl = `${IMAGE_HOSTING_R2_CDN_URL}/production/${filename}` + console.log("Downloading image:", filename) + const imageBuffer = await fetch(imageUrl).then((res) => res.arrayBuffer()) + const metadata = stringifyImageMetadata(image) + const isInvalid = validateImage(Buffer.from(imageBuffer), metadata) + if (isInvalid) { + console.log(`Image "${filename}" is invalid: ${isInvalid}`) + invalidImages.push({ + filename, + reason: isInvalid, + }) + return + } + + const formData = new FormData() + formData.append("url", imageUrl) + formData.append("metadata", metadata) + formData.append("requireSignedURLs", "false") + + console.log("Uploading image to Cloudflare Images...") + const uploadResults = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1`, + { + method: "POST", + headers: { + Authorization: `Bearer ${CLOUDFLARE_IMAGES_API_KEY}`, + }, + body: formData, + } + ).then((res) => res.json()) + + if (!uploadResults || uploadResults.errors.length) { + invalidImages.push({ + filename, + reason: InvalidImageReason.UnknownError, + extra: uploadResults.errors, + }) + return + } + + await trx.raw( + `-- sql + UPDATE images + SET cloudflareId = ? + WHERE googleId = ?`, + [uploadResults.result.id, image.googleId] + ) +} + +async function getCloudflareImageDirectory() { + console.log("Fetching Cloudflare Images directory...") + const directory = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1?per_page=2000`, + { + headers: { + Authorization: `Bearer ${CLOUDFLARE_IMAGES_API_KEY}`, + }, + } + ) + .then((res) => res.json()) + .then((res) => { + console.log( + `Cloudflare Images directory fetched. ${res.result.images.length} images found.` + ) + return res.result.images + }) + .then( + (images) => + keyBy(images, (image) => + decodeURIComponent(image.filename) + ) as CloudflareImageDirectory + ) + + return directory +} + +async function fetchImagesFromDatabase(trx: db.KnexReadWriteTransaction) { + console.log("Fetching images from the database...") + return await trx + .raw( + `-- sql + SELECT * FROM images WHERE id IN ( + SELECT DISTINCT imageId FROM posts_gdocs_x_images + )` + ) + .then((res) => res.flat()) + .then(excludeNullish) + .then((images) => images.filter((image) => image && image.filename)) + .then((images) => + images.sort((a, b) => a.filename.localeCompare(b.filename)) + ) +} + +async function uploadImagesToCloudflareImages( + trx: db.KnexReadWriteTransaction, + cloudflareImagesDirectory: CloudflareImageDirectory +) { + const invalidImages: ImageValidationObject[] = [] + + const images = await fetchImagesFromDatabase(trx) + console.log(`${images.length} images fetched.`) + + await pMap( + images, + async (image) => { + console.log(`Processing image: ${image.filename}`) + try { + await uploadImageToCloudflareImages( + trx, + image, + invalidImages, + cloudflareImagesDirectory + ) + } catch (e) { + console.error(e) + invalidImages.push({ + filename: image.filename, + reason: InvalidImageReason.UnknownError, + extra: e, + }) + } + }, + { concurrency: 10 } + ) + + console.log("Finished!") + console.log( + `There were ${invalidImages.length} invalid images. See invalidImages.json for details.` + ) + + await fs.writeFile( + path.join(__dirname, "invalidImages.json"), + JSON.stringify(invalidImages, null, 2) + ) +} + +async function main() { + if (!CLOUDFLARE_IMAGES_ACCOUNT_ID || !CLOUDFLARE_IMAGES_API_KEY) { + console.error( + `Cloudflare Images credentials not set. +You need to set "CLOUDFLARE_IMAGES_ACCOUNT_ID" and "CLOUDFLARE_IMAGES_API_KEY" in your .env` + ) + return + } + + await db.knexReadWriteTransaction(async (trx) => { + // await purgeRecords(trx) + + const directory = await getCloudflareImageDirectory() + const { isValid, invalidImages } = await validateDirectory( + trx, + directory + ) + if (isValid) { + await uploadImagesToCloudflareImages(trx, directory) + } else { + console.error( + `The DB has images that do not exist in the Cloudflare Images directory. You should check those out first` + ) + console.error(invalidImages) + } + }) +} + +main().then(() => process.exit(0)) diff --git a/devTools/cloudflareImagesSync/tsconfig.json b/devTools/cloudflareImagesSync/tsconfig.json new file mode 100644 index 00000000000..208a03820db --- /dev/null +++ b/devTools/cloudflareImagesSync/tsconfig.json @@ -0,0 +1,15 @@ +{ + "extends": "../tsconfigs/tsconfig.base.json", + "compilerOptions": { + "outDir": "../../itsJustJavascript/devTools/cloudflareImagesSync", + "rootDir": "." + }, + "references": [ + { + "path": "../../db" + }, + { + "path": "../../settings" + } + ] +} diff --git a/package.json b/package.json index 77a81064274..76707ebe367 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,8 @@ "testJest": "lerna run buildTests && jest", "testSiteNavigation": "tsx --tsconfig tsconfig.tsx.json devTools/navigationTest/navigationTest.ts", "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json", - "syncGraphersToR2": "tsx --tsconfig tsconfig.tsx.json devTools/syncGraphersToR2/syncGraphersToR2.ts" + "syncGraphersToR2": "tsx --tsconfig tsconfig.tsx.json devTools/syncGraphersToR2/syncGraphersToR2.ts", + "syncCloudflareImages": "tsx --tsconfig tsconfig.tsx.json devTools/cloudflareImagesSync/cloudflareImagesSync.ts" }, "dependencies": { "@algolia/autocomplete-js": "^1.17.2", @@ -189,6 +190,7 @@ "@types/fs-extra": "^11.0.1", "@types/geojson": "^7946.0.10", "@types/html-to-text": "^9.0.4", + "@types/image-size": "^0.8.0", "@types/indefinite": "^2.3.2", "@types/ini": "^4", "@types/js-cookie": "^3.0.2", @@ -232,6 +234,7 @@ "flag-icons": "^7.2.3", "http-server": "^14.1.1", "husky": "^9.0.11", + "image-size": "^1.1.1", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", "lerna": "^8.1.6", diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index 2530259bba4..51959210ada 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -169,6 +169,12 @@ export const R2_SECRET_ACCESS_KEY: string = export const R2_REGION: string = serverSettings.R2_REGION || rcloneConfig["owid-r2"]?.region || "auto" +export const CLOUDFLARE_IMAGES_ACCOUNT_ID: string = + serverSettings.CLOUDFLARE_IMAGES_ACCOUNT_ID || "" + +export const CLOUDFLARE_IMAGES_API_KEY: string = + serverSettings.CLOUDFLARE_IMAGES_API_KEY || "" + export const GRAPHER_CONFIG_R2_BUCKET: string | undefined = serverSettings.GRAPHER_CONFIG_R2_BUCKET export const GRAPHER_CONFIG_R2_BUCKET_PATH: string | undefined = diff --git a/yarn.lock b/yarn.lock index 14308cf0d1d..4bf31fa42a1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5270,6 +5270,15 @@ __metadata: languageName: node linkType: hard +"@types/image-size@npm:^0.8.0": + version: 0.8.0 + resolution: "@types/image-size@npm:0.8.0" + dependencies: + image-size: "npm:*" + checksum: 10/9530adc7515609f801d37d4db80f883855d7e5ba6c593f3705b6d54550438a97822937d416bc27e09237b2c610e692cce3cf59ff3105d9c4bb8c91c13ba269b6 + languageName: node + linkType: hard + "@types/indefinite@npm:^2.3.2": version: 2.3.2 resolution: "@types/indefinite@npm:2.3.2" @@ -11073,6 +11082,7 @@ __metadata: "@types/fs-extra": "npm:^11.0.1" "@types/geojson": "npm:^7946.0.10" "@types/html-to-text": "npm:^9.0.4" + "@types/image-size": "npm:^0.8.0" "@types/indefinite": "npm:^2.3.2" "@types/ini": "npm:^4" "@types/js-cookie": "npm:^3.0.2" @@ -11147,6 +11157,7 @@ __metadata: html-to-text: "npm:^9.0.5" http-server: "npm:^14.1.1" husky: "npm:^9.0.11" + image-size: "npm:^1.1.1" indefinite: "npm:^2.4.3" ini: "npm:^4.1.2" instantsearch.js: "npm:^4.72.1" @@ -11728,6 +11739,17 @@ __metadata: languageName: node linkType: hard +"image-size@npm:*, image-size@npm:^1.1.1": + version: 1.1.1 + resolution: "image-size@npm:1.1.1" + dependencies: + queue: "npm:6.0.2" + bin: + image-size: bin/image-size.js + checksum: 10/f28966dd3f6d4feccc4028400bb7e8047c28b073ab0aa90c7c53039288139dd416c6bc254a976d4bf61113d4bc84871786804113099701cbfe9ccf377effdb54 + languageName: node + linkType: hard + "immutable@npm:^4.0.0, immutable@npm:^4.3.6": version: 4.3.6 resolution: "immutable@npm:4.3.6" @@ -16311,6 +16333,15 @@ __metadata: languageName: node linkType: hard +"queue@npm:6.0.2": + version: 6.0.2 + resolution: "queue@npm:6.0.2" + dependencies: + inherits: "npm:~2.0.3" + checksum: 10/3437954ef1442c86ff01a0fbe3dc6222838823b1ca97f37eff651bc20b868c0c2904424ef2c0d44cba46055f54b578f92866e573125dc9a5e8823d751e4d1585 + languageName: node + linkType: hard + "quick-lru@npm:^4.0.1": version: 4.0.1 resolution: "quick-lru@npm:4.0.1" From 15415605c651e11481a1b4df6897792989b6f586 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Tue, 12 Nov 2024 17:49:01 -0500 Subject: [PATCH 03/40] =?UTF-8?q?=E2=9C=A8=20use=20the=20v2=20list=20image?= =?UTF-8?q?s=20API=20in=20CFI=20sync=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/cloudflareImagesSync/cloudflareImagesSync.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devTools/cloudflareImagesSync/cloudflareImagesSync.ts b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts index 94d1e4090c5..8d78862cfe5 100644 --- a/devTools/cloudflareImagesSync/cloudflareImagesSync.ts +++ b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts @@ -135,7 +135,7 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { console.log("Deleting image:", image.filename) try { await fetch( - `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1/${image.id}`, + `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v2/${image.id}`, { method: "DELETE", headers: { From 3fae4c3c385ceec5d15431ff6f7bafd2a6e4bf83 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Wed, 13 Nov 2024 16:16:00 -0500 Subject: [PATCH 04/40] =?UTF-8?q?=F0=9F=90=9B=20fix=20sync=20script=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cloudflareImagesSync.ts | 161 +++++++++--------- package.json | 2 - yarn.lock | 31 ---- 3 files changed, 80 insertions(+), 114 deletions(-) diff --git a/devTools/cloudflareImagesSync/cloudflareImagesSync.ts b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts index 8d78862cfe5..b00932aae9a 100644 --- a/devTools/cloudflareImagesSync/cloudflareImagesSync.ts +++ b/devTools/cloudflareImagesSync/cloudflareImagesSync.ts @@ -1,4 +1,3 @@ -const is = require("image-size") import * as readline from "readline" import pMap from "p-map" import path from "path" @@ -15,7 +14,6 @@ import { excludeNullish, keyBy } from "@ourworldindata/utils" type CloudflareImageDirectory = Record enum InvalidImageReason { - TooLarge = "TooLarge", InvalidFormat = "InvalidFormat", InvalidDimensions = "InvalidDimensions", TooManyMegapixels = "TooManyMegapixels", @@ -29,6 +27,34 @@ type ImageValidationObject = { extra?: any } +type CloudflareAPIResponseInfo = { + code: number + message: string +} + +type CloudflareAPIDeleteResponse = { + result: any + errors: CloudflareAPIResponseInfo[] + messages: CloudflareAPIResponseInfo[] + success: boolean +} + +type CloudflareAPIUploadResponse = { + errors: CloudflareAPIResponseInfo[] + messages: CloudflareAPIResponseInfo[] + result: { + id?: string + filename?: string + meta?: { + key: string + } + requireSignedURLs?: boolean + uploaded?: string + variants?: string[] + } + success: boolean +} + function stringifyImageMetadata(image: DbEnrichedImage) { return JSON.stringify({ filename: image.filename, @@ -50,53 +76,10 @@ async function validateDirectory( `-- sql SELECT filename, cloudflareId FROM images WHERE cloudflareId IS NOT NULL` ) - const imagesSharingCloudflareIds = await db - .knexRaw<{ - cloudflareId: string - count: number - filenames: string - }>( - trx, - `-- sql - SELECT - cloudflareId, - COUNT(*) as count, - JSON_ARRAYAGG( - filename - ) as filenames - FROM images - WHERE cloudflareId IS NOT NULL - GROUP BY cloudflareId - HAVING count > 1` - ) - .then((results) => - results.map((result) => ({ - cloudflareId: result.cloudflareId, - count: result.count, - filenames: JSON.parse(result.filenames) as string[], - })) - ) - .then((results) => keyBy(results, "cloudflareId")) const invalidImages: string[] = [] for (const image of imagesWithIds) { if (!directory[image.filename]) { - // If an identical image was uploaded with multiple filenames, subsequent copies will use the same cloudflareId as the first - // so let's check if this is a case of that - const imagesSharingCloudflareId = - imagesSharingCloudflareIds[image.cloudflareId] - if (imagesSharingCloudflareId) { - const filenames = imagesSharingCloudflareId.filenames - if (filenames.includes(image.filename)) { - console.log( - `Image with filename "${image.filename}" has a cloudflareId that is shared with other images.` - ) - continue - } - } - console.log( - `Image with filename "${image.filename}" has a cloudflareId that is not in the Cloudflare Images directory.` - ) invalidImages.push(image.filename) } } @@ -117,6 +100,7 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { "Are you sure you want to delete ALL images from Cloudflare Images? (y/n) ", (answer) => { if (answer.toLowerCase() === "y") { + console.log("May God have mercy on your soul.") resolve() } else { console.log("Aborting.") @@ -135,7 +119,7 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { console.log("Deleting image:", image.filename) try { await fetch( - `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v2/${image.id}`, + `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1/${encodeURIComponent(image.id)}`, { method: "DELETE", headers: { @@ -143,11 +127,23 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { }, } ) + .then((res) => res.json()) + .then((res: CloudflareAPIDeleteResponse) => { + if (res.success) { + console.log("Image deleted:", image.filename) + } else { + console.error( + "Error deleting image:", + image.filename, + res.errors + ) + } + }) } catch (e) { console.error(e) } }, - { concurrency: 10 } + { concurrency: 6 } ) console.log("Finished") @@ -170,7 +166,6 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { } ) }) - console.log("May God have mercy on your soul.") await db.knexRaw( trx, @@ -185,24 +180,23 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) { * Cloudflare has a width/height of 12000px, metadata of 1024B, 100megapixels, and a 10MB filesize limit */ function validateImage( - imageBuffer: Buffer, + image: DbEnrichedImage, metadata: string ): InvalidImageReason | null { - const imageSize = is(imageBuffer) - if (!imageSize) { + if (!image.filename.match(/\.(png|jpg|jpeg|gif|webp)$/)) { return InvalidImageReason.InvalidFormat } - if (imageSize.width > 12000 || imageSize.height > 12000) { - return InvalidImageReason.InvalidDimensions + if (!image.originalWidth || !image.originalHeight) { + return InvalidImageReason.InvalidFormat } - if (imageSize.width * imageSize.height > 100 * 1000000) { - return InvalidImageReason.TooManyMegapixels + if (image.originalWidth > 12000 || image.originalHeight > 12000) { + return InvalidImageReason.InvalidDimensions } - if (imageBuffer.byteLength > 10 * 1024 * 1024) { - return InvalidImageReason.TooLarge + if (image.originalWidth * image.originalHeight > 100 * 1000000) { + return InvalidImageReason.TooManyMegapixels } if (Buffer.byteLength(metadata, "utf8") > 1024) { @@ -217,9 +211,7 @@ async function checkIfAlreadyUploadedToCloudflareImages( cloudflareImagesDirectory: CloudflareImageDirectory ): Promise { if (cloudflareImagesDirectory[filename]) { - console.log( - `Image with filename "${filename}" has already uploaded to Cloudflare Images.` - ) + console.log("Already in Cloudflare Images:", filename) return true } return false @@ -229,20 +221,18 @@ async function checkIfAlreadyTrackedInDB( trx: db.KnexReadWriteTransaction, filename: string ) { - console.log("Checking to see if the DB has the Cloudflare ID...") const cloudflareId = await trx .raw<{ cloudflareId: string }[][]>( `-- sql - SELECT cloudflareId FROM images WHERE filename = ? - `, + SELECT cloudflareId FROM images WHERE filename = ?`, [filename] ) .then((res) => res[0][0]?.cloudflareId) if (!cloudflareId) { - console.log("No Cloudflare ID found in the DB.") + console.log("Not tracked in DB:", filename) return false } else { - console.log(`Cloudflare ID "${cloudflareId}" exists in the DB.`) + console.log("Already tracked in DB:", filename) return true } } @@ -255,9 +245,9 @@ async function updateDbWithCloudflareId( console.log("Updating the DB with the Cloudflare ID...") await trx.raw( `-- sql - UPDATE images - SET cloudflareId = ? - WHERE filename = ?`, + UPDATE images + SET cloudflareId = ? + WHERE filename = ?`, [cloudflareId, filename] ) } @@ -292,25 +282,24 @@ async function uploadImageToCloudflareImages( } const imageUrl = `${IMAGE_HOSTING_R2_CDN_URL}/production/${filename}` - console.log("Downloading image:", filename) - const imageBuffer = await fetch(imageUrl).then((res) => res.arrayBuffer()) const metadata = stringifyImageMetadata(image) - const isInvalid = validateImage(Buffer.from(imageBuffer), metadata) - if (isInvalid) { - console.log(`Image "${filename}" is invalid: ${isInvalid}`) + const invalidReason = validateImage(image, metadata) + if (invalidReason) { + console.log("Image invalid:", filename) invalidImages.push({ filename, - reason: isInvalid, + reason: invalidReason, }) return } const formData = new FormData() formData.append("url", imageUrl) + formData.append("id", encodeURIComponent(filename)) formData.append("metadata", metadata) formData.append("requireSignedURLs", "false") - console.log("Uploading image to Cloudflare Images...") + console.log("Uploading image:", filename) const uploadResults = await fetch( `https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1`, { @@ -320,7 +309,16 @@ async function uploadImageToCloudflareImages( }, body: formData, } - ).then((res) => res.json()) + ) + .then((res) => res.json()) + .then((res: CloudflareAPIUploadResponse) => { + if (res.success) { + console.log("Upload complete:", filename) + } else { + console.error("Upload error:", filename, res.errors) + } + return res + }) if (!uploadResults || uploadResults.errors.length) { invalidImages.push({ @@ -333,10 +331,10 @@ async function uploadImageToCloudflareImages( await trx.raw( `-- sql - UPDATE images - SET cloudflareId = ? - WHERE googleId = ?`, - [uploadResults.result.id, image.googleId] + UPDATE images + SET cloudflareId = ? + WHERE filename = ?`, + [uploadResults.result.id, filename] ) } @@ -391,6 +389,7 @@ async function uploadImagesToCloudflareImages( const invalidImages: ImageValidationObject[] = [] const images = await fetchImagesFromDatabase(trx) + console.log(`${images.length} images fetched.`) await pMap( @@ -413,7 +412,7 @@ async function uploadImagesToCloudflareImages( }) } }, - { concurrency: 10 } + { concurrency: 6 } ) console.log("Finished!") diff --git a/package.json b/package.json index 76707ebe367..53ba5566e67 100644 --- a/package.json +++ b/package.json @@ -190,7 +190,6 @@ "@types/fs-extra": "^11.0.1", "@types/geojson": "^7946.0.10", "@types/html-to-text": "^9.0.4", - "@types/image-size": "^0.8.0", "@types/indefinite": "^2.3.2", "@types/ini": "^4", "@types/js-cookie": "^3.0.2", @@ -234,7 +233,6 @@ "flag-icons": "^7.2.3", "http-server": "^14.1.1", "husky": "^9.0.11", - "image-size": "^1.1.1", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", "lerna": "^8.1.6", diff --git a/yarn.lock b/yarn.lock index 4bf31fa42a1..14308cf0d1d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5270,15 +5270,6 @@ __metadata: languageName: node linkType: hard -"@types/image-size@npm:^0.8.0": - version: 0.8.0 - resolution: "@types/image-size@npm:0.8.0" - dependencies: - image-size: "npm:*" - checksum: 10/9530adc7515609f801d37d4db80f883855d7e5ba6c593f3705b6d54550438a97822937d416bc27e09237b2c610e692cce3cf59ff3105d9c4bb8c91c13ba269b6 - languageName: node - linkType: hard - "@types/indefinite@npm:^2.3.2": version: 2.3.2 resolution: "@types/indefinite@npm:2.3.2" @@ -11082,7 +11073,6 @@ __metadata: "@types/fs-extra": "npm:^11.0.1" "@types/geojson": "npm:^7946.0.10" "@types/html-to-text": "npm:^9.0.4" - "@types/image-size": "npm:^0.8.0" "@types/indefinite": "npm:^2.3.2" "@types/ini": "npm:^4" "@types/js-cookie": "npm:^3.0.2" @@ -11157,7 +11147,6 @@ __metadata: html-to-text: "npm:^9.0.5" http-server: "npm:^14.1.1" husky: "npm:^9.0.11" - image-size: "npm:^1.1.1" indefinite: "npm:^2.4.3" ini: "npm:^4.1.2" instantsearch.js: "npm:^4.72.1" @@ -11739,17 +11728,6 @@ __metadata: languageName: node linkType: hard -"image-size@npm:*, image-size@npm:^1.1.1": - version: 1.1.1 - resolution: "image-size@npm:1.1.1" - dependencies: - queue: "npm:6.0.2" - bin: - image-size: bin/image-size.js - checksum: 10/f28966dd3f6d4feccc4028400bb7e8047c28b073ab0aa90c7c53039288139dd416c6bc254a976d4bf61113d4bc84871786804113099701cbfe9ccf377effdb54 - languageName: node - linkType: hard - "immutable@npm:^4.0.0, immutable@npm:^4.3.6": version: 4.3.6 resolution: "immutable@npm:4.3.6" @@ -16333,15 +16311,6 @@ __metadata: languageName: node linkType: hard -"queue@npm:6.0.2": - version: 6.0.2 - resolution: "queue@npm:6.0.2" - dependencies: - inherits: "npm:~2.0.3" - checksum: 10/3437954ef1442c86ff01a0fbe3dc6222838823b1ca97f37eff651bc20b868c0c2904424ef2c0d44cba46055f54b578f92866e573125dc9a5e8823d751e4d1585 - languageName: node - linkType: hard - "quick-lru@npm:^4.0.1": version: 4.0.1 resolution: "quick-lru@npm:4.0.1" From 681db2b1bdfd47a55b20d4fe595b2b16432a1b88 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Fri, 15 Nov 2024 17:16:50 -0500 Subject: [PATCH 05/40] =?UTF-8?q?=F0=9F=8E=89=20add=20admin=20for=20managi?= =?UTF-8?q?ng=20cloudflare=20images?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteClient/AdminApp.tsx | 2 + adminSiteClient/AdminSidebar.tsx | 6 + adminSiteClient/ImagesIndexPage.tsx | 260 ++++++++++++++++++++++++++++ adminSiteServer/apiRouter.ts | 184 ++++++++++++++++++++ db/db.ts | 13 ++ 5 files changed, 465 insertions(+) create mode 100644 adminSiteClient/ImagesIndexPage.tsx diff --git a/adminSiteClient/AdminApp.tsx b/adminSiteClient/AdminApp.tsx index e39777a31a4..04adf0d3878 100644 --- a/adminSiteClient/AdminApp.tsx +++ b/adminSiteClient/AdminApp.tsx @@ -44,6 +44,7 @@ import { GdocsStoreProvider } from "./GdocsStore.js" import { IndicatorChartEditorPage } from "./IndicatorChartEditorPage.js" import { ChartViewEditorPage } from "./ChartViewEditorPage.js" import { ChartViewIndexPage } from "./ChartViewIndexPage.js" +import { ImageIndexPage } from "./ImagesIndexPage.js" @observer class AdminErrorMessage extends React.Component<{ admin: Admin }> { @@ -175,6 +176,7 @@ export class AdminApp extends React.Component<{ /> )} /> + ( Google Docs +
  • + + Images + +
  • Explorers diff --git a/adminSiteClient/ImagesIndexPage.tsx b/adminSiteClient/ImagesIndexPage.tsx new file mode 100644 index 00000000000..90e7787dfb0 --- /dev/null +++ b/adminSiteClient/ImagesIndexPage.tsx @@ -0,0 +1,260 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useState, +} from "react" +import { Button, Flex, Input, Space, Table, Upload } from "antd" + +import { AdminLayout } from "./AdminLayout.js" +import { AdminAppContext } from "./AdminAppContext.js" +import { DbEnrichedImage } from "@ourworldindata/types" +import { Timeago } from "./Forms.js" +import { ColumnsType } from "antd/es/table/InternalTable.js" +import { FontAwesomeIcon } from "@fortawesome/react-fontawesome" +import { faUpload } from "@fortawesome/free-solid-svg-icons" +import { Admin } from "./Admin.js" +import { RcFile } from "antd/es/upload/interface.js" +import TextArea from "antd/es/input/TextArea.js" + +type ImageEditorApi = { + patchImage: ( + image: DbEnrichedImage, + patch: Partial + ) => void + deleteImage: (image: DbEnrichedImage) => void + getImages: () => Promise +} + +function AltTextEditor({ + image, + text, + patchImage, +}: { + image: DbEnrichedImage + text: string + patchImage: ImageEditorApi["patchImage"] +}) { + const [value, setValue] = useState(text) + + const handleBlur = useCallback( + (e: React.FocusEvent) => { + const trimmed = e.target.value.trim() + setValue(trimmed) + if (trimmed !== text) { + patchImage(image, { defaultAlt: trimmed }) + } + }, + [image, text, patchImage] + ) + + return ( +