Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: resolve crash caused by images being overwritten with HTML #1687

Merged
merged 1 commit into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions src/lib/parser/DeckParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ import { embedFile } from './exporters/embedFile';
import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink';
import getYouTubeID from './helpers/getYouTubeID';
import { isFileNameEqual } from '../storage/types';
import { isImageFileEmbedable, isMarkdownFile } from '../storage/checks';
import {
isHTMLFile,
isImageFileEmbedable,
isMarkdownFile,
} from '../storage/checks';
import { getFileContents } from './getFileContents';
import { handleNestedBulletPointsInMarkdown } from './handleNestedBulletPointsInMarkdown';
import { checkFlashcardsLimits } from '../User/checkFlashcardsLimits';
Expand Down Expand Up @@ -60,30 +64,34 @@ export class DeckParser {
this.firstDeckName = input.name;
this.noLimits = input.noLimits;
this.globalTags = null;
this.payload = [];
this.processFirstFile(input.name);
}

const firstFile = this.files.find((file) =>
isFileNameEqual(file, input.name)
);
processFirstFile(name: string) {
const firstFile = this.files.find((file) => isFileNameEqual(file, name));

if (this.settings.nestedBulletPoints && isMarkdownFile(input.name)) {
if (this.settings.nestedBulletPoints && isMarkdownFile(name)) {
const contents = getFileContents(firstFile, false);
this.payload = handleNestedBulletPointsInMarkdown(
input.name,
name,
contents?.toString(),
this.settings.deckName,
[],
this.settings
);
} else {
} else if (isHTMLFile(name)) {
const contents = getFileContents(firstFile, true);
this.payload = contents
? this.handleHTML(
input.name,
name,
contents.toString(),
this.settings.deckName || '',
[]
)
: [];
} else {
this.payload = [];
}
}

Expand Down
35 changes: 27 additions & 8 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import getDeckFilename from '../anki/getDeckFilename';
import { DeckParser, DeckParserInput } from './DeckParser';
import Deck from './Deck';
import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks';
import {
isHTMLFile,
isImageFile,
isPDFFile,
isPPTFile,
} from '../storage/checks';
import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML';
import { convertPDFToImages } from '../pdf/convertPDFToImages';
import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF';
Expand All @@ -16,6 +21,8 @@ interface PrepareDeckResult {
export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
const convertedImageFiles = [];

for (const file of input.files) {
if (!file.contents) {
continue;
Expand All @@ -26,9 +33,13 @@ export async function PrepareDeck(
input.settings.imageQuizHtmlToAnki &&
input.noLimits
) {
file.contents = await convertImageToHTML(
const convertedImageContents = await convertImageToHTML(
file.contents?.toString('base64')
);
convertedImageFiles.push({
name: `${file.name}.html`,
contents: convertedImageContents,
});
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will remove this debug write out


if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue;
Expand Down Expand Up @@ -57,15 +68,23 @@ export async function PrepareDeck(
}
}

input.files.push(...convertedImageFiles);
const parser = new DeckParser(input);

if (parser.totalCardCount() === 0) {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
if (convertedImageFiles.length > 0) {
const htmlFile = convertedImageFiles.find((file) =>
isHTMLFile(file.name)
);
parser.processFirstFile(htmlFile?.name ?? input.name);
} else {
const apkg = await parser.tryExperimental(input.workspace);
return {
name: getDeckFilename(parser.name ?? input.name),
apkg,
deck: parser.payload,
};
}
}

const apkg = await parser.build(input.workspace);
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/experimental/FallbackParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class FallbackParser {
const plainTextParser = new PlainTextParser();
const found = plainTextParser.parse(plainText);
cards = this.mapCardsToNotes(found);
deckName = this.getTitleFromHTML(contents);
deckName = this.getTitleFromHTML(contents) ?? file.name;
} else if (isMarkdownFile(file.name) || isPlainText(file.name)) {
const plainTextParser = new PlainTextParser();
const items = this.getMarkdownBulletLists(contents);
Expand Down
6 changes: 4 additions & 2 deletions src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';
import { removeFirstAndLastLine } from './removeFirstAndLastLine';

export const convertImageToHTML = async (
imageData: string
Expand All @@ -8,7 +9,7 @@ export const convertImageToHTML = async (
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-flash-002';
const model = 'gemini-1.5-pro-002';

const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
Expand All @@ -21,7 +22,7 @@ export const convertImageToHTML = async (
});

const text1 = {
text: `Convert the text in this image to the following format
text: `Convert the text in this image to the following format for (every question is their own ul):

<ul class=\"toggle\">
  <li>
Expand Down Expand Up @@ -71,6 +72,7 @@ export const convertImageToHTML = async (
} catch (error) {
console.error('Error generating content stream:', error);
}
htmlContent = removeFirstAndLastLine(htmlContent);

return htmlContent;
};
11 changes: 11 additions & 0 deletions src/lib/parser/experimental/VertexAPI/removeFirstAndLastLine.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Google VertexAI is returning Markdown:
* ```html
* [...]
* ```
* So we need to remove the first and last line
*/
export function removeFirstAndLastLine(content: string): string {
const lines = content.split('\n');
return lines.slice(1, -1).join('\n');
}
4 changes: 3 additions & 1 deletion src/usecases/uploads/isZipContentFileSupported.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
isPlainText,
isCSVFile,
isPDFFile,
isImageFile,
} from '../../lib/storage/checks';

/**
Expand All @@ -14,4 +15,5 @@ export const isZipContentFileSupported = (filename: string) =>
isMarkdownFile(filename) ??
isPlainText(filename) ??
isCSVFile(filename) ??
isPDFFile(filename);
isPDFFile(filename) ??
isImageFile(filename);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allow image uploads

Loading