Skip to content

Commit

Permalink
fix: resolve crash caused by images being overwritten with HTML
Browse files Browse the repository at this point in the history
This is backwards compatible and should only trigger with the
imageQuizHtmlToAnki card option.
  • Loading branch information
aalemayhu committed Dec 21, 2024
1 parent 0958f0a commit 5d98afc
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 20 deletions.
24 changes: 16 additions & 8 deletions src/lib/parser/DeckParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ import { embedFile } from './exporters/embedFile';
import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink';
import getYouTubeID from './helpers/getYouTubeID';
import { isFileNameEqual } from '../storage/types';
import { isImageFileEmbedable, isMarkdownFile } from '../storage/checks';
import {
isHTMLFile,
isImageFileEmbedable,
isMarkdownFile,
} from '../storage/checks';
import { getFileContents } from './getFileContents';
import { handleNestedBulletPointsInMarkdown } from './handleNestedBulletPointsInMarkdown';
import { checkFlashcardsLimits } from '../User/checkFlashcardsLimits';
Expand Down Expand Up @@ -60,30 +64,34 @@ export class DeckParser {
this.firstDeckName = input.name;
this.noLimits = input.noLimits;
this.globalTags = null;
this.payload = [];
this.processFirstFile(input.name);
}

const firstFile = this.files.find((file) =>
isFileNameEqual(file, input.name)
);
processFirstFile(name: string) {
const firstFile = this.files.find((file) => isFileNameEqual(file, name));

if (this.settings.nestedBulletPoints && isMarkdownFile(input.name)) {
if (this.settings.nestedBulletPoints && isMarkdownFile(name)) {
const contents = getFileContents(firstFile, false);
this.payload = handleNestedBulletPointsInMarkdown(
input.name,
name,
contents?.toString(),
this.settings.deckName,
[],
this.settings
);
} else {
} else if (isHTMLFile(name)) {
const contents = getFileContents(firstFile, true);
this.payload = contents
? this.handleHTML(
input.name,
name,
contents.toString(),
this.settings.deckName || '',
[]
)
: [];
} else {
this.payload = [];
}
}

Expand Down
35 changes: 27 additions & 8 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import getDeckFilename from '../anki/getDeckFilename';
import { DeckParser, DeckParserInput } from './DeckParser';
import Deck from './Deck';
import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks';
import {
isHTMLFile,
isImageFile,
isPDFFile,
isPPTFile,
} from '../storage/checks';
import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML';
import { convertPDFToImages } from '../pdf/convertPDFToImages';
import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF';
Expand All @@ -16,6 +21,8 @@ interface PrepareDeckResult {
export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
const convertedImageFiles = [];

for (const file of input.files) {
if (!file.contents) {
continue;
Expand All @@ -26,9 +33,13 @@ export async function PrepareDeck(
input.settings.imageQuizHtmlToAnki &&
input.noLimits
) {
file.contents = await convertImageToHTML(
const convertedImageContents = await convertImageToHTML(
file.contents?.toString('base64')
);
convertedImageFiles.push({
name: `${file.name}.html`,
contents: convertedImageContents,
});
}

if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue;
Expand Down Expand Up @@ -57,15 +68,23 @@ export async function PrepareDeck(
}
}

input.files.push(...convertedImageFiles);
const parser = new DeckParser(input);

if (parser.totalCardCount() === 0) {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
if (convertedImageFiles.length > 0) {
const htmlFile = convertedImageFiles.find((file) =>
isHTMLFile(file.name)
);
parser.processFirstFile(htmlFile?.name ?? input.name);
} else {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
}
}

const apkg = await parser.build(input.workspace);
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/experimental/FallbackParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class FallbackParser {
const plainTextParser = new PlainTextParser();
const found = plainTextParser.parse(plainText);
cards = this.mapCardsToNotes(found);
deckName = this.getTitleFromHTML(contents);
deckName = this.getTitleFromHTML(contents) ?? file.name;
} else if (isMarkdownFile(file.name) || isPlainText(file.name)) {
const plainTextParser = new PlainTextParser();
const items = this.getMarkdownBulletLists(contents);
Expand Down
6 changes: 4 additions & 2 deletions src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';
import { removeFirstAndLastLine } from './removeFirstAndLastLine';

export const convertImageToHTML = async (
imageData: string
Expand All @@ -8,7 +9,7 @@ export const convertImageToHTML = async (
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-flash-002';
const model = 'gemini-1.5-pro-002';

const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
Expand All @@ -21,7 +22,7 @@ export const convertImageToHTML = async (
});

const text1 = {
text: `Convert the text in this image to the following format
text: `Convert the text in this image to the following format for (every question is their own ul):
<ul class=\"toggle\">
  <li>
Expand Down Expand Up @@ -71,6 +72,7 @@ export const convertImageToHTML = async (
} catch (error) {
console.error('Error generating content stream:', error);
}
htmlContent = removeFirstAndLastLine(htmlContent);

return htmlContent;
};
11 changes: 11 additions & 0 deletions src/lib/parser/experimental/VertexAPI/removeFirstAndLastLine.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Google VertexAI is returning Markdown:
* ```html
* [...]
* ```
* So we need to remove the first and last line
*/
export function removeFirstAndLastLine(content: string): string {
const lines = content.split('\n');
return lines.slice(1, -1).join('\n');
}
4 changes: 3 additions & 1 deletion src/usecases/uploads/isZipContentFileSupported.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
isPlainText,
isCSVFile,
isPDFFile,
isImageFile,
} from '../../lib/storage/checks';

/**
Expand All @@ -14,4 +15,5 @@ export const isZipContentFileSupported = (filename: string) =>
isMarkdownFile(filename) ??
isPlainText(filename) ??
isCSVFile(filename) ??
isPDFFile(filename);
isPDFFile(filename) ??
isImageFile(filename);

0 comments on commit 5d98afc

Please sign in to comment.