Skip to content

Commit

Permalink
fix: resolve crash caused by images being overwritten with HTML
Browse files Browse the repository at this point in the history
This is backwards compatible and should only trigger with the
imageQuizHtmlToAnki card option.
  • Loading branch information
aalemayhu committed Dec 21, 2024
1 parent 0958f0a commit 94d0e17
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 22 deletions.
24 changes: 16 additions & 8 deletions src/lib/parser/DeckParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ import { embedFile } from './exporters/embedFile';
import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink';
import getYouTubeID from './helpers/getYouTubeID';
import { isFileNameEqual } from '../storage/types';
import { isImageFileEmbedable, isMarkdownFile } from '../storage/checks';
import {
isHTMLFile,
isImageFileEmbedable,
isMarkdownFile,
} from '../storage/checks';
import { getFileContents } from './getFileContents';
import { handleNestedBulletPointsInMarkdown } from './handleNestedBulletPointsInMarkdown';
import { checkFlashcardsLimits } from '../User/checkFlashcardsLimits';
Expand Down Expand Up @@ -60,30 +64,34 @@ export class DeckParser {
this.firstDeckName = input.name;
this.noLimits = input.noLimits;
this.globalTags = null;
this.payload = [];
this.processFirstFile(input.name);
}

const firstFile = this.files.find((file) =>
isFileNameEqual(file, input.name)
);
processFirstFile(name: string) {
const firstFile = this.files.find((file) => isFileNameEqual(file, name));

if (this.settings.nestedBulletPoints && isMarkdownFile(input.name)) {
if (this.settings.nestedBulletPoints && isMarkdownFile(name)) {
const contents = getFileContents(firstFile, false);
this.payload = handleNestedBulletPointsInMarkdown(
input.name,
name,
contents?.toString(),
this.settings.deckName,
[],
this.settings
);
} else {
} else if (isHTMLFile(name)) {
const contents = getFileContents(firstFile, true);
this.payload = contents
? this.handleHTML(
input.name,
name,
contents.toString(),
this.settings.deckName || '',
[]
)
: [];
} else {
this.payload = [];
}
}

Expand Down
42 changes: 34 additions & 8 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import fs from 'fs';

import getDeckFilename from '../anki/getDeckFilename';
import { DeckParser, DeckParserInput } from './DeckParser';
import Deck from './Deck';
import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks';
import {
isHTMLFile,
isImageFile,
isPDFFile,
isPPTFile,
} from '../storage/checks';
import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML';
import { convertPDFToImages } from '../pdf/convertPDFToImages';
import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF';
Expand All @@ -16,6 +23,8 @@ interface PrepareDeckResult {
export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
const convertedImageFiles = [];

for (const file of input.files) {
if (!file.contents) {
continue;
Expand All @@ -26,9 +35,18 @@ export async function PrepareDeck(
input.settings.imageQuizHtmlToAnki &&
input.noLimits
) {
file.contents = await convertImageToHTML(
const convertedImageContents = await convertImageToHTML(
file.contents?.toString('base64')
);
convertedImageFiles.push({
name: `${file.name}.html`,
contents: convertedImageContents,
});

fs.writeFileSync(
`/Users/scanf/Downloads/${file.name}.html`,
convertedImageContents
);
}

if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue;
Expand Down Expand Up @@ -57,15 +75,23 @@ export async function PrepareDeck(
}
}

input.files.push(...convertedImageFiles);
const parser = new DeckParser(input);

if (parser.totalCardCount() === 0) {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
if (convertedImageFiles.length > 0) {
const htmlFile = convertedImageFiles.find((file) =>
isHTMLFile(file.name)
);
parser.processFirstFile(htmlFile?.name ?? input.name);
} else {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
}
}

const apkg = await parser.build(input.workspace);
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/experimental/FallbackParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class FallbackParser {
const plainTextParser = new PlainTextParser();
const found = plainTextParser.parse(plainText);
cards = this.mapCardsToNotes(found);
deckName = this.getTitleFromHTML(contents);
deckName = this.getTitleFromHTML(contents) ?? file.name;
} else if (isMarkdownFile(file.name) || isPlainText(file.name)) {
const plainTextParser = new PlainTextParser();
const items = this.getMarkdownBulletLists(contents);
Expand Down
19 changes: 15 additions & 4 deletions src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
import { VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';

/**
* Google VertexAI is returning Markdown:
* ```html
* [...]
* ```
* So we need to remove the first and last line
*/
function removeFirstAndLastLine(content: string): string {
const lines = content.split('\n');
return lines.slice(1, -1).join('\n');
}

export const convertImageToHTML = async (
imageData: string
): Promise<string> => {
const vertexAI = new VertexAI({
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-flash-002';
const model = 'gemini-1.5-pro-002';

const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
Expand All @@ -21,7 +33,7 @@ export const convertImageToHTML = async (
});

const text1 = {
text: `Convert the text in this image to the following format
text: `Convert the text in this image to the following format for (every question is their own ul):
<ul class=\"toggle\">
  <li>
Expand Down Expand Up @@ -71,6 +83,5 @@ export const convertImageToHTML = async (
} catch (error) {
console.error('Error generating content stream:', error);
}

return htmlContent;
return removeFirstAndLastLine(htmlContent);
};
4 changes: 3 additions & 1 deletion src/usecases/uploads/isZipContentFileSupported.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
isPlainText,
isCSVFile,
isPDFFile,
isImageFile,
} from '../../lib/storage/checks';

/**
Expand All @@ -14,4 +15,5 @@ export const isZipContentFileSupported = (filename: string) =>
isMarkdownFile(filename) ??
isPlainText(filename) ??
isCSVFile(filename) ??
isPDFFile(filename);
isPDFFile(filename) ??
isImageFile(filename);

0 comments on commit 94d0e17

Please sign in to comment.