Skip to content

Commit

Permalink
fix(kb): File types
Browse files Browse the repository at this point in the history
  • Loading branch information
RezaRahemtola committed Sep 11, 2024
1 parent 2a35a2e commit 4ebb665
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
6 changes: 4 additions & 2 deletions src/utils/knowledge/embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ const getTextSplitter = (
chunkOverlap: number,
): MarkdownTextSplitter | RecursiveCharacterTextSplitter => {
switch (fileType) {
case 'text/markdown':
return new MarkdownTextSplitter();
case 'markdown':
return RecursiveCharacterTextSplitter.fromLanguage(fileType);
case 'md':
return RecursiveCharacterTextSplitter.fromLanguage('markdown');
default:
return new RecursiveCharacterTextSplitter({
chunkSize,
Expand Down
12 changes: 6 additions & 6 deletions src/utils/knowledge/parsing.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import mime from 'mime';
import * as pdfjs from 'pdfjs-dist';
import { TextItem } from 'pdfjs-dist/types/src/display/api';

export const supportedInputFiles = ['.txt', '.md', '.pdf'].join(',');
export const supportedInputFiles = ['.txt', '.md', '.markdown', '.pdf'].join(',');

const extractTextFromPdfFile = async (file: File): Promise<string> => {
const pdfUrl = URL.createObjectURL(file);
Expand All @@ -27,15 +26,16 @@ const extractTextFromPdfFile = async (file: File): Promise<string> => {

export const extractFileContent = async (file: File): Promise<{ type: string; content: string }> => {
let extractedText = '';
const fileType = mime.getType(file.name) ?? file.type;
const fileType = file.name.split('.').pop();

try {
switch (fileType) {
case 'application/pdf':
case 'pdf':
extractedText = await extractTextFromPdfFile(file);
break;
case 'text/markdown':
case 'text/plain':
case 'markdown':
case 'md':
case 'txt':
extractedText = await new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => resolve(event.target!.result as string);
Expand Down

0 comments on commit 4ebb665

Please sign in to comment.