Skip to content

Commit

Permalink
feat: convert images to flashcards
Browse files Browse the repository at this point in the history
Related-to: #1156
Related-to: #1483
  • Loading branch information
aalemayhu committed Dec 8, 2024
1 parent 6e988b1 commit bd8c86c
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 37 deletions.
2 changes: 2 additions & 0 deletions src/controllers/SettingsController/SettingsController.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ describe('SettingsController', () => {
'perserve-newlines': 'true',
'vertex-ai-pdf-questions': 'false',
'disable-indented-bullets': 'false',
'image-quiz-html-to-anki': 'false',
});
});

Expand All @@ -74,6 +75,7 @@ describe('SettingsController', () => {
'max-one-toggle-per-card': 'true',
'perserve-newlines': 'false',
'page-emoji': 'first-emoji',
'image-quiz-html-to-anki': 'false',
});
});
});
6 changes: 6 additions & 0 deletions src/controllers/SettingsController/supportedOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ const supportedOptions = (): CardOption[] => {
'Disable indented bullets from becoming separate cards. This applies to bullet lists.',
false
),
new CardOption(
'image-quiz-html-to-anki',
'Convert Image Quiz HTML to Anki Cards',
'Use OCR to extract images and answers from HTML quizzes and convert them into Anki flashcards for review. This is a premium feature.',
false
),
];

return v.filter(Boolean);
Expand Down
18 changes: 16 additions & 2 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import getDeckFilename from '../anki/getDeckFilename';
import { DeckParser, DeckParserInput } from './DeckParser';
import Deck from './Deck';
import { isPDFFile, isPPTFile } from '../storage/checks';
import { isImageFile, isPDFFile, isPPTFile } from '../storage/checks';
import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML';
import { convertPDFToImages } from '../pdf/convertPDFToImages';
import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF';
import { convertImageToHTML } from './experimental/VertexAPI/convertImageToHTML';

interface PrepareDeckResult {
name: string;
Expand All @@ -16,8 +17,21 @@ export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
for (const file of input.files) {
if ((!isPDFFile(file.name) && !isPPTFile(file.name)) || !file.contents)
if (!file.contents) {
continue;
}

if (
isImageFile(file.name) &&
input.settings.imageQuizHtmlToAnki &&
input.noLimits
) {
file.contents = await convertImageToHTML(
file.contents?.toString('base64')
);
}

if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue;

if (
isPDFFile(file.name) &&
Expand Down
4 changes: 4 additions & 0 deletions src/lib/parser/Settings/Settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ export class Settings {
readonly vertexAIPDFQuestions: boolean;
readonly disableIndentedBulletPoints: boolean;

readonly imageQuizHtmlToAnki: boolean;

constructor(input: { [key: string]: string }) {
this.deckName = input.deckName;
if (this.deckName && !this.deckName.trim()) {
Expand Down Expand Up @@ -103,6 +105,7 @@ export class Settings {
this.vertexAIPDFQuestions = input['vertex-ai-pdf-questions'] === 'true';
this.disableIndentedBulletPoints =
input['disable-indented-bullets'] === 'true';
this.imageQuizHtmlToAnki = input['image-quiz-html-to-anki'] === 'true';
/* Is this really needed? */
if (this.parentBlockId) {
this.addNotionLink = true;
Expand Down Expand Up @@ -143,6 +146,7 @@ export class Settings {
'max-one-toggle-per-card': 'true',
'perserve-newlines': 'false',
'page-emoji': 'first-emoji',
'image-quiz-html-to-anki': 'false',
};
}
}
20 changes: 20 additions & 0 deletions src/lib/parser/experimental/VertexAPI/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { HarmBlockThreshold, HarmCategory } from '@google-cloud/vertexai';

export const SAFETY_SETTINGS = [
{
category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
];
76 changes: 76 additions & 0 deletions src/lib/parser/experimental/VertexAPI/convertImageToHTML.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';

export const convertImageToHTML = async (
imageData: string
): Promise<string> => {
const vertexAI = new VertexAI({
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-flash-002';

const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
generationConfig: {
maxOutputTokens: 8192,
temperature: 1,
topP: 0.95,
},
safetySettings: SAFETY_SETTINGS,
});

const text1 = {
text: `Convert the text in this image to the following format: 
<ul class=\"toggle\">
  <li>
   <details>
    <summary>
n) question
    </summary>
<p>A) ..., </p>
<p>B)... </p>
etc. 
<p>and finally Answer: D</p>
   </details>
  </li>
  </ul>
- Extra rules: n=is the number for the question, question=the question text
- Add newline between the options
- If you are not able to detect the pattern above, try converting this into a question and answer format`,
};

const image1 = {
inlineData: {
mimeType: 'image/png',
data: imageData,
},
};

const req = {
contents: [{ role: 'user', parts: [text1, image1] }],
};

let htmlContent = '';
try {
const streamingResp = await generativeModel.generateContentStream(req);
for await (const item of streamingResp.stream) {
if (
item.candidates &&
item.candidates[0].content &&
item.candidates[0].content.parts
) {
htmlContent += item.candidates[0].content.parts
.map((part) => part.text)
.join('');
}
}
} catch (error) {
console.error('Error generating content stream:', error);
}

return htmlContent;
};
9 changes: 9 additions & 0 deletions src/lib/storage/checks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,12 @@ export const isPotentialZipFile = (
}
return filename.trim().endsWith('.') || !filename.includes('.');
};

export const isImageFile = (name: string) =>
isImageFileEmbedable(name) &&
(name.toLowerCase().endsWith('.png') ||
name.toLowerCase().endsWith('.jpg') ||
name.toLowerCase().endsWith('.jpeg') ||
name.toLowerCase().endsWith('.gif') ||
name.toLowerCase().endsWith('.bmp') ||
name.toLowerCase().endsWith('.svg'));
113 changes: 81 additions & 32 deletions src/lib/zip/zip.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,16 @@ import { strFromU8, unzipSync } from 'fflate';
import { Body } from 'aws-sdk/clients/s3';
import { renderToStaticMarkup } from 'react-dom/server';
import { getUploadLimits } from '../misc/getUploadLimits';
import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks';
import {
isHTMLFile,
isImageFile,
isMarkdownFile,
isPDFFile,
} from '../storage/checks';
import { processAndPrepareArchiveData } from './fallback/processAndPrepareArchiveData';
import { convertImageToHTML } from '../parser/experimental/VertexAPI/convertImageToHTML';
import Settings from '../parser/Settings';
import { getRandomUUID } from '../../shared/helpers/getRandomUUID';

interface File {
name: string;
Expand All @@ -14,14 +22,16 @@ class ZipHandler {
files: File[];
zipFileCount: number;
maxZipFiles: number;
combinedHTML: string;

constructor(maxNestedZipFiles: number) {
this.files = [];
this.zipFileCount = 0;
this.maxZipFiles = maxNestedZipFiles;
this.combinedHTML = '';
}

async build(zipData: Uint8Array, paying: boolean) {
async build(zipData: Uint8Array, paying: boolean, settings: Settings) {
const size = Buffer.byteLength(zipData);
const limits = getUploadLimits(paying);

Expand All @@ -38,50 +48,89 @@ class ZipHandler {
);
}

await this.processZip(zipData, paying);
await this.processZip(zipData, paying, settings);
}

private async processZip(zipData: Uint8Array, paying: boolean) {
private async processZip(
zipData: Uint8Array,
paying: boolean,
settings: Settings
) {
if (this.zipFileCount >= this.maxZipFiles) {
throw new Error('Too many zip files in the upload.');
}

try {
const loadedZip = unzipSync(zipData, {
filter(file) {
return !file.name.endsWith('/');
},
filter: (file) => !file.name.endsWith('/'),
});

for (const name in loadedZip) {
const file = loadedZip[name];
let contents = file;

if (name.includes('__MACOSX/') || isPDFFile(name)) {
continue;
}

if (name.endsWith('.zip')) {
this.zipFileCount++;
await this.processZip(file, paying);
} else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) {
this.files.push({ name, contents: strFromU8(file) });
} else if (contents) {
this.files.push({ name, contents });
}
await this.handleFile(name, file, paying, settings);
}

this.addCombinedHTMLToFiles(paying, settings);
} catch (error: unknown) {
// Code 13 indicates we need to use fallback archive processing
const isArchiveProcessingError = (error as { code?: number }).code === 13;

if (isArchiveProcessingError) {
// Use fallback method to process archive
const foundFiles = await processAndPrepareArchiveData(zipData, paying);
this.files.push(...foundFiles);
console.log('Processed files using fallback method:', this.files);
} else {
throw error;
}
this.handleZipError(error, zipData, paying);
}
}

private async handleFile(
name: string,
file: Uint8Array,
paying: boolean,
settings: Settings
) {
if (name.includes('__MACOSX/') || isPDFFile(name)) return;

if (name.endsWith('.zip')) {
this.zipFileCount++;
await this.processZip(file, paying, settings);
} else if (isHTMLFile(name) || isMarkdownFile(name)) {
this.files.push({ name, contents: strFromU8(file) });
} else if (paying && settings.imageQuizHtmlToAnki && isImageFile(name)) {
await this.convertAndAddImageToHTML(name, file);
} else {
this.files.push({ name, contents: file });
}
}

private async convertAndAddImageToHTML(name: string, file: Uint8Array) {
const html = await convertImageToHTML(Buffer.from(file).toString('base64'));
this.combinedHTML += html;
console.log('Converted image to HTML:', name, html);
}

private addCombinedHTMLToFiles(paying: boolean, settings: Settings) {
if (this.combinedHTML && paying) {
const finalHTML = `<!DOCTYPE html>
<html>
<head><title>${settings.deckName ?? 'Image Quiz'}</title></head>
<body>
${this.combinedHTML}
</body>
</html>`;
this.files.push({
name: `ocr-${getRandomUUID()}.html`,
contents: finalHTML,
});
}
}

private async handleZipError(
error: unknown,
zipData: Uint8Array,
paying: boolean
) {
const isArchiveProcessingError = (error as { code?: number }).code === 13;

if (isArchiveProcessingError) {
const foundFiles = await processAndPrepareArchiveData(zipData, paying);
this.files.push(...foundFiles);
console.log('Processed files using fallback method:', this.files);
} else {
throw error;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/usecases/uploads/getPackagesFromZip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export const getPackagesFromZip = async (
return { packages: [] };
}

await zipHandler.build(fileContents as Uint8Array, paying);
await zipHandler.build(fileContents as Uint8Array, paying, settings);

const fileNames = zipHandler.getFileNames();

Expand Down
10 changes: 8 additions & 2 deletions src/usecases/uploads/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import Settings from '../../lib/parser/Settings';
import Package from '../../lib/parser/Package';
import fs from 'fs';
import { PrepareDeck } from '../../lib/parser/PrepareDeck';
import { isPotentialZipFile, isZIPFile } from '../../lib/storage/checks';
import {
isImageFile,
isPotentialZipFile,
isZIPFile,
} from '../../lib/storage/checks';
import { getPackagesFromZip } from './getPackagesFromZip';
import Workspace from '../../lib/parser/WorkSpace';
import { isZipContentFileSupported } from './isZipContentFileSupported';
Expand All @@ -28,7 +32,9 @@ function doGenerationWork(data: GenerationData) {
const filename = file.originalname;
const key = file.key;

if (isZipContentFileSupported(filename)) {
const allowImageQuizHtmlToAnki =
paying && settings.imageQuizHtmlToAnki && isImageFile(filename);
if (isZipContentFileSupported(filename) || allowImageQuizHtmlToAnki) {
const d = await PrepareDeck({
name: filename,
files: [{ name: filename, contents: fileContents }],
Expand Down

0 comments on commit bd8c86c

Please sign in to comment.