From 94d0e1750ad0281cdc3b9df2930e3451b64cdacb Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Sat, 21 Dec 2024 08:12:25 +0100 Subject: [PATCH] fix: resolve crash caused by images being overwritten with HTML This is backwards compatible and should only trigger with the imageQuizHtmlToAnki card option. --- src/lib/parser/DeckParser.ts | 24 +++++++---- src/lib/parser/PrepareDeck.ts | 42 +++++++++++++++---- src/lib/parser/experimental/FallbackParser.ts | 2 +- .../VertexAPI/convertImageToHTML.ts | 19 +++++++-- .../uploads/isZipContentFileSupported.ts | 4 +- 5 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/lib/parser/DeckParser.ts b/src/lib/parser/DeckParser.ts index ee15d9a8..31c4d8a3 100644 --- a/src/lib/parser/DeckParser.ts +++ b/src/lib/parser/DeckParser.ts @@ -19,7 +19,11 @@ import { embedFile } from './exporters/embedFile'; import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink'; import getYouTubeID from './helpers/getYouTubeID'; import { isFileNameEqual } from '../storage/types'; -import { isImageFileEmbedable, isMarkdownFile } from '../storage/checks'; +import { + isHTMLFile, + isImageFileEmbedable, + isMarkdownFile, +} from '../storage/checks'; import { getFileContents } from './getFileContents'; import { handleNestedBulletPointsInMarkdown } from './handleNestedBulletPointsInMarkdown'; import { checkFlashcardsLimits } from '../User/checkFlashcardsLimits'; @@ -60,30 +64,34 @@ export class DeckParser { this.firstDeckName = input.name; this.noLimits = input.noLimits; this.globalTags = null; + this.payload = []; + this.processFirstFile(input.name); + } - const firstFile = this.files.find((file) => - isFileNameEqual(file, input.name) - ); + processFirstFile(name: string) { + const firstFile = this.files.find((file) => isFileNameEqual(file, name)); - if (this.settings.nestedBulletPoints && isMarkdownFile(input.name)) { + if (this.settings.nestedBulletPoints && isMarkdownFile(name)) { const contents = getFileContents(firstFile, false); this.payload = handleNestedBulletPointsInMarkdown( - input.name, + name, contents?.toString(), this.settings.deckName, [], this.settings ); - } else { + } else if (isHTMLFile(name)) { const contents = getFileContents(firstFile, true); this.payload = contents ? this.handleHTML( - input.name, + name, contents.toString(), this.settings.deckName || '', [] ) : []; + } else { + this.payload = []; } } diff --git a/src/lib/parser/PrepareDeck.ts b/src/lib/parser/PrepareDeck.ts index 3092206f..b9cbe4e4 100644 --- a/src/lib/parser/PrepareDeck.ts +++ b/src/lib/parser/PrepareDeck.ts @@ -1,7 +1,14 @@ +import fs from 'fs'; + import getDeckFilename from '../anki/getDeckFilename'; import { DeckParser, DeckParserInput } from './DeckParser'; import Deck from './Deck'; -import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks'; +import { + isHTMLFile, + isImageFile, + isPDFFile, + isPPTFile, +} from '../storage/checks'; import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML'; import { convertPDFToImages } from '../pdf/convertPDFToImages'; import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF'; @@ -16,6 +23,8 @@ interface PrepareDeckResult { export async function PrepareDeck( input: DeckParserInput ): Promise { + const convertedImageFiles = []; + for (const file of input.files) { if (!file.contents) { continue; @@ -26,9 +35,18 @@ export async function PrepareDeck( input.settings.imageQuizHtmlToAnki && input.noLimits ) { - file.contents = await convertImageToHTML( + const convertedImageContents = await convertImageToHTML( file.contents?.toString('base64') ); + convertedImageFiles.push({ + name: `${file.name}.html`, + contents: convertedImageContents, + }); + + fs.writeFileSync( + `/Users/scanf/Downloads/${file.name}.html`, + convertedImageContents + ); } if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue; @@ -57,15 +75,23 @@ export async function PrepareDeck( } } + input.files.push(...convertedImageFiles); const parser = new DeckParser(input); if (parser.totalCardCount() === 0) { - const apkg = await parser.tryExperimental(input.workspace); - return { - name: getDeckFilename(parser.name ?? input.name), - apkg, - deck: parser.payload, - }; + if (convertedImageFiles.length > 0) { + const htmlFile = convertedImageFiles.find((file) => + isHTMLFile(file.name) + ); + parser.processFirstFile(htmlFile?.name ?? input.name); + } else { + const apkg = await parser.tryExperimental(input.workspace); + return { + name: getDeckFilename(parser.name ?? input.name), + apkg, + deck: parser.payload, + }; + } } const apkg = await parser.build(input.workspace); diff --git a/src/lib/parser/experimental/FallbackParser.ts b/src/lib/parser/experimental/FallbackParser.ts index a75cfd16..8236a814 100644 --- a/src/lib/parser/experimental/FallbackParser.ts +++ b/src/lib/parser/experimental/FallbackParser.ts @@ -121,7 +121,7 @@ class FallbackParser { const plainTextParser = new PlainTextParser(); const found = plainTextParser.parse(plainText); cards = this.mapCardsToNotes(found); - deckName = this.getTitleFromHTML(contents); + deckName = this.getTitleFromHTML(contents) ?? file.name; } else if (isMarkdownFile(file.name) || isPlainText(file.name)) { const plainTextParser = new PlainTextParser(); const items = this.getMarkdownBulletLists(contents); diff --git a/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts b/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts index 8e025a80..14ba36a4 100644 --- a/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts +++ b/src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts @@ -1,6 +1,18 @@ import { VertexAI } from '@google-cloud/vertexai'; import { SAFETY_SETTINGS } from './constants'; +/** + * Google VertexAI is returning Markdown: + * ```html + * [...] + * ``` + * So we need to remove the first and last line + */ +function removeFirstAndLastLine(content: string): string { + const lines = content.split('\n'); + return lines.slice(1, -1).join('\n'); +} + export const convertImageToHTML = async ( imageData: string ): Promise => { @@ -8,7 +20,7 @@ export const convertImageToHTML = async ( project: 'notion-to-anki', location: 'europe-west3', }); - const model = 'gemini-1.5-flash-002'; + const model = 'gemini-1.5-pro-002'; const generativeModel = vertexAI.preview.getGenerativeModel({ model: model, @@ -21,7 +33,7 @@ export const convertImageToHTML = async ( }); const text1 = { - text: `Convert the text in this image to the following format:  + text: `Convert the text in this image to the following format for (every question is their own ul):
      
  • @@ -71,6 +83,5 @@ export const convertImageToHTML = async ( } catch (error) { console.error('Error generating content stream:', error); } - - return htmlContent; + return removeFirstAndLastLine(htmlContent); }; diff --git a/src/usecases/uploads/isZipContentFileSupported.ts b/src/usecases/uploads/isZipContentFileSupported.ts index 8f03067c..22291021 100644 --- a/src/usecases/uploads/isZipContentFileSupported.ts +++ b/src/usecases/uploads/isZipContentFileSupported.ts @@ -4,6 +4,7 @@ import { isPlainText, isCSVFile, isPDFFile, + isImageFile, } from '../../lib/storage/checks'; /** @@ -14,4 +15,5 @@ export const isZipContentFileSupported = (filename: string) => isMarkdownFile(filename) ?? isPlainText(filename) ?? isCSVFile(filename) ?? - isPDFFile(filename); + isPDFFile(filename) ?? + isImageFile(filename);