Skip to content

Commit

Permalink
update to libraries
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanerix committed May 29, 2024
1 parent 226a0ca commit 8ba9b8e
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 140 deletions.
3 changes: 2 additions & 1 deletion config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ export const config = {
port: 3000,
enableHttps : false,
keyFile: "../keys/privkey.pem", // Only needed if enableHttps is true
certFile: "../keys/fullchain.pem" // Only needed if enableHttps is true
certFile: "../keys/fullchain.pem", // Only needed if enableHttps is true
currentVersion: 0.1
};
21 changes: 13 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,33 @@
"type": "module",
"scripts": {
"start": "tsc; node dist/start-server.js",
"clean": "rm -rf dist node_modules package-lock.json",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"@chainsafe/libp2p-noise": "^15.0.0",
"@chainsafe/libp2p-yamux": "^6.0.2",
"@decorators/di": "^3.1.0",
"@decorators/express": "^3.0.0",
"@helia/json": "^3.0.2",
"@helia/unixfs": "^3.0.2",
"@helia/dag-cbor": "^3.0.4",
"@helia/json": "^3.0.4",
"@helia/unixfs": "^3.0.6",
"@types/express": "^4.17.21",
"axios": "^1.6.8",
"axios": "^1.7.2",
"blockstore-level": "^1.1.8",
"cors": "^2.8.5",
"crypto-js": "^4.2.0",
"express": "^4.19.2",
"helia": "^4.1.0",
"kiss-framework": "^1.0.12",
"libp2p": "^1.3.0",
"helia": "^4.2.3",
"kiss-framework": "^1.0.65",
"libp2p": "^1.5.0",
"multiformats": "^13.1.0",
"pdf-parse-fork": "^1.2.0",
"ts-node": "^10.9.2",
"tslib": "^2.6.2",
"typescript": "^5.4.3",
"ws": "^8.16.0"
"typescript": "^5.4.5",
"ws": "^8.17.0"
}
}
28 changes: 28 additions & 0 deletions services/ollama.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import axios from "axios";
import { Service } from "kiss-framework";
import { base64 } from "multiformats/bases/base64";

type CollectionData = {
collection_name: string,
filename: string,
known_type: boolean,
status: boolean
}

@Service
export class OllamaProxyService {


async extractEmailsFromPdf(pdfText): Promise<string[]> {
// Make a POST request to the Ollama API
const response = await axios.post('http://192.168.178.208:11434/api/generate', {
model: 'extract-emails',
prompt: pdfText,
stream: false,
// images: [base64PdfData] // Assuming the API accepts the PDF data as a field in the JSON payload
});
console.debug(`response.data:`, response.data);
// Assume the API returns a list of emails in the response data
return response.data;
}
}
13 changes: 12 additions & 1 deletion start-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,21 @@ import cors from "cors";
import { EventEmitter } from 'events';
import express, { Request, Response } from "express";
import { KissServer } from "kiss-framework";
import { tictoc } from "kiss-framework";
import { config } from "./config.js";


export function tictoc(msg = '') {
const tic = {start: new Date().getTime(), msg}
return {
toc: (msg = '') => {
const toc = new Date().getTime();
const interval = toc - tic.start;
console.log(`${interval / 1000.0}s ${tic.msg} ${msg}`);
return interval
}
}
}

const logger = (req: Request, res: Response, next: Function) => {
const tic = tictoc()
next();
Expand Down
6 changes: 4 additions & 2 deletions web/add.controller.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { json } from '@helia/json';
import { unixfs } from '@helia/unixfs';
import { Request } from 'express';
import { CURRENT_VERSION, Controller, Post, assert } from "kiss-framework";
import { Controller, Post, assert } from "kiss-framework";
import { PdfFile } from '../datatypes/pdf-file.js';
import { HeliaService } from "../services/helia.service.js";
import { config } from '../config.js';


@Controller
export class AddFileController {
Expand All @@ -23,7 +25,7 @@ export class AddFileController {

const content = await fs.addByteStream(<any>req);
const pdfFile = new PdfFile({
"protocol-version": CURRENT_VERSION,
"protocol-version": config.currentVersion,
name: filename,
ownerId,
content: content.toJSON()
Expand Down
60 changes: 60 additions & 0 deletions web/extract-emails.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { json } from '@helia/json';
import { Controller, Get, assert } from "kiss-framework";
import { OllamaProxyService } from "../services/ollama.service.js";
import { HeliaService } from '../services/helia.service.js';
import { base64 } from 'multiformats/bases/base64';
import { CID } from 'multiformats';
import { dagCbor } from '@helia/dag-cbor'
import { unixfs } from '@helia/unixfs';
import pdfParse from 'pdf-parse-fork';

export function isCid(link: string | { [key: string]: string }): boolean {
// CIDv1: Starts with "b" followed by base encoding character (z or k or m) followed by multihash
// CIDv0: Multihash encoded in base58btc
// This is a very basic check and may not cover all possible CID formats
const cidRegex = /^(b[a-zA-Z0-9]{1,})|([Qm][a-zA-Z0-9]{44})$/;

if (typeof link === 'string') {
return cidRegex.test(link);
} else if (typeof link === 'object' && link !== null) {
return link.hasOwnProperty('/') && typeof link['/'] === 'string' && cidRegex.test(link['/']);
}

return false;
}

@Controller
export class extractEmailsFromPdfController {

private ollama = new OllamaProxyService()

private helia = new HeliaService()

@Get('pdf')
async extractEmails(pdf: string): Promise<string[]> {
console.debug(`extractEmails. pdf:`, pdf);
assert(isCid(pdf), 'pdf is not a valid cid');
console.debug(`pdf is a valid cid`);
const heliaNode = this.helia.heliaNode
const fs = unixfs(heliaNode)
console.debug(`extractEmails. fs working`);
const pdfCid = CID.parse(pdf)
console.debug(`pdfCid:`, pdfCid);
let pdfData = new Uint8Array();
for await (const chunk of fs.cat(pdfCid)) {
// console.debug(`chunk:`, chunk);
const chunkData = new Uint8Array(chunk);
// console.debug(`chunkData:`, chunkData);
pdfData = new Uint8Array([...pdfData, ...chunkData]);
}
console.debug(`pdfData length:`, pdfData.length);
const pdfBuffer = Buffer.from(pdfData);
const data = await pdfParse(pdfBuffer);
const textContent = data.text.trim();
console.debug(`pdfInfo:`, textContent);
const emails = await this.ollama.extractEmailsFromPdf(textContent);
// Assume the API returns a list of emails in the response data
return emails['response'];
}

}
Loading

0 comments on commit 8ba9b8e

Please sign in to comment.