From 538fa4012272f9ed57049743f18a758974e9d2be Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Sun, 8 Dec 2024 18:18:34 +0100 Subject: [PATCH] feat: convert images to flashcards Related-to: https://github.com/2anki/server/issues/1156 Related-to: https://github.com/2anki/server/issues/1483 --- .../SettingsController.test.ts | 27 ++--- .../SettingsController/supportedOptions.ts | 6 + src/lib/parser/PrepareDeck.ts | 18 ++- src/lib/parser/Settings/Settings.ts | 4 + .../experimental/VertexAPI/constants.ts | 20 ++++ .../VertexAPI/convertImageToHTML.ts | 76 ++++++++++++ .../VertexAPI/convertPDFToHTML.ts | 27 +---- src/lib/storage/checks.ts | 9 ++ src/lib/zip/zip.tsx | 113 +++++++++++++----- src/usecases/uploads/getPackagesFromZip.ts | 2 +- src/usecases/uploads/worker.ts | 10 +- 11 files changed, 237 insertions(+), 75 deletions(-) create mode 100644 src/lib/parser/experimental/VertexAPI/constants.ts create mode 100644 src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts diff --git a/src/controllers/SettingsController/SettingsController.test.ts b/src/controllers/SettingsController/SettingsController.test.ts index 282d900c..55d7a8ee 100644 --- a/src/controllers/SettingsController/SettingsController.test.ts +++ b/src/controllers/SettingsController/SettingsController.test.ts @@ -21,15 +21,18 @@ class FakeSettingsService implements IServiceSettings { } } +function testDefaultSettings( + type: 'client' | 'server', + expectedOptions: Record +) { + const settingsController = new SettingsController(new FakeSettingsService()); + const defaultOptions = settingsController.getDefaultSettingsCardOptions(type); + expect(defaultOptions).toStrictEqual(expectedOptions); +} + describe('SettingsController', () => { test('returns default settings for client', () => { - const settingsController = new SettingsController( - new FakeSettingsService() - ); - const defaultOptions = - settingsController.getDefaultSettingsCardOptions('client'); - - expect(defaultOptions).toStrictEqual({ + testDefaultSettings('client', { 'add-notion-link': 'false', 'use-notion-id': 'true', all: 'true', @@ -48,17 +51,12 @@ describe('SettingsController', () => { 'perserve-newlines': 'true', 'vertex-ai-pdf-questions': 'false', 'disable-indented-bullets': 'false', + 'image-quiz-html-to-anki': 'false', }); }); test('returns default settings for server', () => { - const settingsController = new SettingsController( - new FakeSettingsService() - ); - const defaultOptions = - settingsController.getDefaultSettingsCardOptions('server'); - - expect(defaultOptions).toStrictEqual({ + testDefaultSettings('server', { 'add-notion-link': 'false', 'use-notion-id': 'true', all: 'true', @@ -74,6 +72,7 @@ describe('SettingsController', () => { 'max-one-toggle-per-card': 'true', 'perserve-newlines': 'false', 'page-emoji': 'first-emoji', + 'image-quiz-html-to-anki': 'false', }); }); }); diff --git a/src/controllers/SettingsController/supportedOptions.ts b/src/controllers/SettingsController/supportedOptions.ts index 031f328c..1f8cbe4f 100644 --- a/src/controllers/SettingsController/supportedOptions.ts +++ b/src/controllers/SettingsController/supportedOptions.ts @@ -130,6 +130,12 @@ const supportedOptions = (): CardOption[] => { 'Disable indented bullets from becoming separate cards. This applies to bullet lists.', false ), + new CardOption( + 'image-quiz-html-to-anki', + 'Convert Image Quiz HTML to Anki Cards', + 'Use OCR to extract images and answers from HTML quizzes and convert them into Anki flashcards for review. This is a premium experimental feature.', + false + ), ]; return v.filter(Boolean); diff --git a/src/lib/parser/PrepareDeck.ts b/src/lib/parser/PrepareDeck.ts index f46a2da0..3092206f 100644 --- a/src/lib/parser/PrepareDeck.ts +++ b/src/lib/parser/PrepareDeck.ts @@ -1,10 +1,11 @@ import getDeckFilename from '../anki/getDeckFilename'; import { DeckParser, DeckParserInput } from './DeckParser'; import Deck from './Deck'; -import { isPDFFile, isPPTFile } from '../storage/checks'; +import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks'; import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML'; import { convertPDFToImages } from '../pdf/convertPDFToImages'; import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF'; +import { convertImageToHTML } from './experimental/VertexAPI/convertImageToHTML'; interface PrepareDeckResult { name: string; @@ -16,8 +17,21 @@ export async function PrepareDeck( input: DeckParserInput ): Promise { for (const file of input.files) { - if ((!isPDFFile(file.name) && !isPPTFile(file.name)) || !file.contents) + if (!file.contents) { continue; + } + + if ( + isImageFile(file.name) && + input.settings.imageQuizHtmlToAnki && + input.noLimits + ) { + file.contents = await convertImageToHTML( + file.contents?.toString('base64') + ); + } + + if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue; if ( isPDFFile(file.name) && diff --git a/src/lib/parser/Settings/Settings.ts b/src/lib/parser/Settings/Settings.ts index 08cae47f..2d628d42 100644 --- a/src/lib/parser/Settings/Settings.ts +++ b/src/lib/parser/Settings/Settings.ts @@ -69,6 +69,8 @@ export class Settings { readonly vertexAIPDFQuestions: boolean; readonly disableIndentedBulletPoints: boolean; + readonly imageQuizHtmlToAnki: boolean; + constructor(input: { [key: string]: string }) { this.deckName = input.deckName; if (this.deckName && !this.deckName.trim()) { @@ -103,6 +105,7 @@ export class Settings { this.vertexAIPDFQuestions = input['vertex-ai-pdf-questions'] === 'true'; this.disableIndentedBulletPoints = input['disable-indented-bullets'] === 'true'; + this.imageQuizHtmlToAnki = input['image-quiz-html-to-anki'] === 'true'; /* Is this really needed? */ if (this.parentBlockId) { this.addNotionLink = true; @@ -143,6 +146,7 @@ export class Settings { 'max-one-toggle-per-card': 'true', 'perserve-newlines': 'false', 'page-emoji': 'first-emoji', + 'image-quiz-html-to-anki': 'false', }; } } diff --git a/src/lib/parser/experimental/VertexAPI/constants.ts b/src/lib/parser/experimental/VertexAPI/constants.ts new file mode 100644 index 00000000..56f0972f --- /dev/null +++ b/src/lib/parser/experimental/VertexAPI/constants.ts @@ -0,0 +1,20 @@ +import { HarmBlockThreshold, HarmCategory } from '@google-cloud/vertexai'; + +export const SAFETY_SETTINGS = [ + { + category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, + threshold: HarmBlockThreshold.BLOCK_NONE, + }, + { + category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, + threshold: HarmBlockThreshold.BLOCK_NONE, + }, + { + category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, + threshold: HarmBlockThreshold.BLOCK_NONE, + }, + { + category: HarmCategory.HARM_CATEGORY_HARASSMENT, + threshold: HarmBlockThreshold.BLOCK_NONE, + }, +]; diff --git a/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts b/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts new file mode 100644 index 00000000..8e025a80 --- /dev/null +++ b/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts @@ -0,0 +1,76 @@ +import { VertexAI } from '@google-cloud/vertexai'; +import { SAFETY_SETTINGS } from './constants'; + +export const convertImageToHTML = async ( + imageData: string +): Promise => { + const vertexAI = new VertexAI({ + project: 'notion-to-anki', + location: 'europe-west3', + }); + const model = 'gemini-1.5-flash-002'; + + const generativeModel = vertexAI.preview.getGenerativeModel({ + model: model, + generationConfig: { + maxOutputTokens: 8192, + temperature: 1, + topP: 0.95, + }, + safetySettings: SAFETY_SETTINGS, + }); + + const text1 = { + text: `Convert the text in this image to the following format:  + +
    +   
  • +    
    +      + n) question +      +

    A) ...,

    +

    B)...

    + etc.  +

    and finally Answer: D

    +    
    +   
  • +   
+ + — + - Extra rules: n=is the number for the question, question=the question text + - Add newline between the options + - If you are not able to detect the pattern above, try converting this into a question and answer format`, + }; + + const image1 = { + inlineData: { + mimeType: 'image/png', + data: imageData, + }, + }; + + const req = { + contents: [{ role: 'user', parts: [text1, image1] }], + }; + + let htmlContent = ''; + try { + const streamingResp = await generativeModel.generateContentStream(req); + for await (const item of streamingResp.stream) { + if ( + item.candidates && + item.candidates[0].content && + item.candidates[0].content.parts + ) { + htmlContent += item.candidates[0].content.parts + .map((part) => part.text) + .join(''); + } + } + } catch (error) { + console.error('Error generating content stream:', error); + } + + return htmlContent; +}; diff --git a/src/lib/parser/experimental/VertexAPI/convertPDFToHTML.ts b/src/lib/parser/experimental/VertexAPI/convertPDFToHTML.ts index aba9d3b5..972b9974 100644 --- a/src/lib/parser/experimental/VertexAPI/convertPDFToHTML.ts +++ b/src/lib/parser/experimental/VertexAPI/convertPDFToHTML.ts @@ -1,12 +1,8 @@ import path from 'path'; import fs from 'fs'; -import { - GenerateContentRequest, - HarmBlockThreshold, - HarmCategory, - VertexAI, -} from '@google-cloud/vertexai'; +import { GenerateContentRequest, VertexAI } from '@google-cloud/vertexai'; +import { SAFETY_SETTINGS } from './constants'; export const convertPDFToHTML = async (pdf: string): Promise => { const vertexAI = new VertexAI({ @@ -21,24 +17,7 @@ export const convertPDFToHTML = async (pdf: string): Promise => { temperature: 1, topP: 0.95, }, - safetySettings: [ - { - category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, - threshold: HarmBlockThreshold.BLOCK_NONE, - }, - { - category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, - threshold: HarmBlockThreshold.BLOCK_NONE, - }, - { - category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, - threshold: HarmBlockThreshold.BLOCK_NONE, - }, - { - category: HarmCategory.HARM_CATEGORY_HARASSMENT, - threshold: HarmBlockThreshold.BLOCK_NONE, - }, - ], + safetySettings: SAFETY_SETTINGS, }); const document1 = { diff --git a/src/lib/storage/checks.ts b/src/lib/storage/checks.ts index b4b6d5ef..0e886de2 100644 --- a/src/lib/storage/checks.ts +++ b/src/lib/storage/checks.ts @@ -42,3 +42,12 @@ export const isPotentialZipFile = ( } return filename.trim().endsWith('.') || !filename.includes('.'); }; + +export const isImageFile = (name: string) => + isImageFileEmbedable(name) && + (name.toLowerCase().endsWith('.png') || + name.toLowerCase().endsWith('.jpg') || + name.toLowerCase().endsWith('.jpeg') || + name.toLowerCase().endsWith('.gif') || + name.toLowerCase().endsWith('.bmp') || + name.toLowerCase().endsWith('.svg')); diff --git a/src/lib/zip/zip.tsx b/src/lib/zip/zip.tsx index b4387f2d..28c6636c 100644 --- a/src/lib/zip/zip.tsx +++ b/src/lib/zip/zip.tsx @@ -2,8 +2,16 @@ import { strFromU8, unzipSync } from 'fflate'; import { Body } from 'aws-sdk/clients/s3'; import { renderToStaticMarkup } from 'react-dom/server'; import { getUploadLimits } from '../misc/getUploadLimits'; -import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks'; +import { + isHTMLFile, + isImageFile, + isMarkdownFile, + isPDFFile, +} from '../storage/checks'; import { processAndPrepareArchiveData } from './fallback/processAndPrepareArchiveData'; +import { convertImageToHTML } from '../parser/experimental/VertexAPI/convertImageToHTML'; +import Settings from '../parser/Settings'; +import { getRandomUUID } from '../../shared/helpers/getRandomUUID'; interface File { name: string; @@ -14,14 +22,16 @@ class ZipHandler { files: File[]; zipFileCount: number; maxZipFiles: number; + combinedHTML: string; constructor(maxNestedZipFiles: number) { this.files = []; this.zipFileCount = 0; this.maxZipFiles = maxNestedZipFiles; + this.combinedHTML = ''; } - async build(zipData: Uint8Array, paying: boolean) { + async build(zipData: Uint8Array, paying: boolean, settings: Settings) { const size = Buffer.byteLength(zipData); const limits = getUploadLimits(paying); @@ -38,50 +48,89 @@ class ZipHandler { ); } - await this.processZip(zipData, paying); + await this.processZip(zipData, paying, settings); } - private async processZip(zipData: Uint8Array, paying: boolean) { + private async processZip( + zipData: Uint8Array, + paying: boolean, + settings: Settings + ) { if (this.zipFileCount >= this.maxZipFiles) { throw new Error('Too many zip files in the upload.'); } try { const loadedZip = unzipSync(zipData, { - filter(file) { - return !file.name.endsWith('/'); - }, + filter: (file) => !file.name.endsWith('/'), }); for (const name in loadedZip) { const file = loadedZip[name]; - let contents = file; - - if (name.includes('__MACOSX/') || isPDFFile(name)) { - continue; - } - - if (name.endsWith('.zip')) { - this.zipFileCount++; - await this.processZip(file, paying); - } else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) { - this.files.push({ name, contents: strFromU8(file) }); - } else if (contents) { - this.files.push({ name, contents }); - } + await this.handleFile(name, file, paying, settings); } + + this.addCombinedHTMLToFiles(paying, settings); } catch (error: unknown) { - // Code 13 indicates we need to use fallback archive processing - const isArchiveProcessingError = (error as { code?: number }).code === 13; - - if (isArchiveProcessingError) { - // Use fallback method to process archive - const foundFiles = await processAndPrepareArchiveData(zipData, paying); - this.files.push(...foundFiles); - console.log('Processed files using fallback method:', this.files); - } else { - throw error; - } + this.handleZipError(error, zipData, paying); + } + } + + private async handleFile( + name: string, + file: Uint8Array, + paying: boolean, + settings: Settings + ) { + if (name.includes('__MACOSX/') || isPDFFile(name)) return; + + if (name.endsWith('.zip')) { + this.zipFileCount++; + await this.processZip(file, paying, settings); + } else if (isHTMLFile(name) || isMarkdownFile(name)) { + this.files.push({ name, contents: strFromU8(file) }); + } else if (paying && settings.imageQuizHtmlToAnki && isImageFile(name)) { + await this.convertAndAddImageToHTML(name, file); + } else { + this.files.push({ name, contents: file }); + } + } + + private async convertAndAddImageToHTML(name: string, file: Uint8Array) { + const html = await convertImageToHTML(Buffer.from(file).toString('base64')); + this.combinedHTML += html; + console.log('Converted image to HTML:', name, html); + } + + private addCombinedHTMLToFiles(paying: boolean, settings: Settings) { + if (this.combinedHTML && paying) { + const finalHTML = ` + +${settings.deckName ?? 'Image Quiz'} + +${this.combinedHTML} + +`; + this.files.push({ + name: `ocr-${getRandomUUID()}.html`, + contents: finalHTML, + }); + } + } + + private async handleZipError( + error: unknown, + zipData: Uint8Array, + paying: boolean + ) { + const isArchiveProcessingError = (error as { code?: number }).code === 13; + + if (isArchiveProcessingError) { + const foundFiles = await processAndPrepareArchiveData(zipData, paying); + this.files.push(...foundFiles); + console.log('Processed files using fallback method:', this.files); + } else { + throw error; } } diff --git a/src/usecases/uploads/getPackagesFromZip.ts b/src/usecases/uploads/getPackagesFromZip.ts index a7f020da..bd25d258 100644 --- a/src/usecases/uploads/getPackagesFromZip.ts +++ b/src/usecases/uploads/getPackagesFromZip.ts @@ -23,7 +23,7 @@ export const getPackagesFromZip = async ( return { packages: [] }; } - await zipHandler.build(fileContents as Uint8Array, paying); + await zipHandler.build(fileContents as Uint8Array, paying, settings); const fileNames = zipHandler.getFileNames(); diff --git a/src/usecases/uploads/worker.ts b/src/usecases/uploads/worker.ts index 6aaa656d..bd977393 100644 --- a/src/usecases/uploads/worker.ts +++ b/src/usecases/uploads/worker.ts @@ -4,7 +4,11 @@ import Settings from '../../lib/parser/Settings'; import Package from '../../lib/parser/Package'; import fs from 'fs'; import { PrepareDeck } from '../../lib/parser/PrepareDeck'; -import { isPotentialZipFile, isZIPFile } from '../../lib/storage/checks'; +import { + isImageFile, + isPotentialZipFile, + isZIPFile, +} from '../../lib/storage/checks'; import { getPackagesFromZip } from './getPackagesFromZip'; import Workspace from '../../lib/parser/WorkSpace'; import { isZipContentFileSupported } from './isZipContentFileSupported'; @@ -28,7 +32,9 @@ function doGenerationWork(data: GenerationData) { const filename = file.originalname; const key = file.key; - if (isZipContentFileSupported(filename)) { + const allowImageQuizHtmlToAnki = + paying && settings.imageQuizHtmlToAnki && isImageFile(filename); + if (isZipContentFileSupported(filename) || allowImageQuizHtmlToAnki) { const d = await PrepareDeck({ name: filename, files: [{ name: filename, contents: fileContents }],