-
=10"
}
},
+ "node_modules/marked": {
+ "version": "12.0.0",
+ "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.0.tgz",
+ "integrity": "sha512-Vkwtq9rLqXryZnWaQc86+FHLC6tr/fycMfYAhiOIXkrNmeGAyhSxjqu0Rs1i0bBqw5u0S7+lV9fdH2ZSVaoa0w==",
+ "bin": {
+ "marked": "bin/marked.js"
+ },
+ "engines": {
+ "node": ">= 18"
+ }
+ },
"node_modules/math.gl": {
"version": "3.6.3",
"resolved": "https://registry.npmjs.org/math.gl/-/math.gl-3.6.3.tgz",
diff --git a/package.json b/package.json
index efef4c7..edfe7eb 100644
--- a/package.json
+++ b/package.json
@@ -15,6 +15,7 @@
"bootstrap": "^5.3.2",
"codemirror": "^5.52.2",
"deck.gl": "^8.9.34",
+ "marked": "^12.0.0",
"pako": "^2.1.0",
"wasm-bhtsne": "^0.3.3"
},
diff --git a/src/js/semantic.js b/src/js/semantic.js
index c5fdd3e..4a09425 100644
--- a/src/js/semantic.js
+++ b/src/js/semantic.js
@@ -38,7 +38,6 @@ let loadResolve;
*/
let queryResolve;
-
function downloadFile(data, filename, mimeType) {
const blob = new Blob([data], { type: mimeType });
diff --git a/src/js/worker.js b/src/js/worker.js
index 893952c..24665d1 100644
--- a/src/js/worker.js
+++ b/src/js/worker.js
@@ -1,6 +1,8 @@
import { pipeline, AutoTokenizer } from '@xenova/transformers';
import pako from 'pako';
import init, { tSNE } from "wasm-bhtsne";
+import { marked } from 'marked';
+
init();
// env.useBrowserCache = false; // for testing
@@ -81,13 +83,12 @@ async function getTokens(text) {
async function chat(text, max_new_tokens = 100) {
return new Promise(async (resolve, reject) => {
// hier Weiche einbauen für Qwen da tokenizer anders
- console.log(chat_model_name);
+ console.log(chat_model_name, max_new_tokens);
if (chat_model_name.includes("Qwen")) {
try {
// Define the prompt and list of messages
-
const messages = [
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user", "content": text }
@@ -95,7 +96,7 @@ async function chat(text, max_new_tokens = 100) {
const generatorText = chat_generator.tokenizer.apply_chat_template(messages, {
tokenize: false,
- add_generation_prompt: true,
+ add_generation_prompt: false,
});
const thisChat = await chat_generator(generatorText, {
@@ -103,14 +104,17 @@ async function chat(text, max_new_tokens = 100) {
do_sample: false,
callback_function: async function (beams) {
//const decodedText = await token_to_text(beams, chat_generator.tokenizer);
- //console.log(decodedText);
- console.log(beams)
+ let decodedText = chat_generator.tokenizer.decode(beams[0].output_token_ids, { skip_special_tokens: false })
+
+ decodedText = decodedText.split("<|im_start|>")[3].replace("<|im_end|>","") // just return the model's output
+ decodedText = marked(decodedText)
+
self.postMessage({
type: 'chat',
- chat_text: beams,
+ chat_text: decodedText
});
- resolve(beams); // Resolve the main promise with chat text
+ resolve(decodedText); // Resolve the main promise with chat text
},
});
} catch (error) {