From b961d83197d4c2b3909ebcf94dfe0361eac38e4b Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:12:52 +0100 Subject: [PATCH 01/11] feat(assignments-service): Add "ignore file" option for embeddings --- .../src/assignment/assignment.schema.ts | 6 +++ .../src/embedding/embedding.service.ts | 51 ++++++++++--------- services/package.json | 1 + services/pnpm-lock.yaml | 9 ++++ 4 files changed, 44 insertions(+), 23 deletions(-) diff --git a/services/apps/assignments/src/assignment/assignment.schema.ts b/services/apps/assignments/src/assignment/assignment.schema.ts index 343b424c8..cf32f86df 100644 --- a/services/apps/assignments/src/assignment/assignment.schema.ts +++ b/services/apps/assignments/src/assignment/assignment.schema.ts @@ -121,6 +121,12 @@ export class ClassroomInfo { @IsOptional() @IsBoolean() openaiConsent?: boolean; + + @Prop() + @ApiPropertyOptional() + @IsOptional() + @IsString() + openaiIgnore?: string; } @Schema() diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index 84206520f..f0546eafb 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -8,6 +8,8 @@ import {SolutionService} from "../solution/solution.service"; import {Assignment} from "../assignment/assignment.schema"; import {FilterQuery} from "mongoose"; import {Solution} from "../solution/solution.schema"; +// @ts-ignore +import * as ignore from 'ignore-file'; type DeclarationSnippet = Pick & { name: string }; @@ -98,29 +100,27 @@ export class EmbeddingService implements OnModuleInit { const assignmentId = assignment._id.toString(); const {solutions, documents} = await this.getDocuments(assignment); - const results = await Promise.all(documents - .filter(d => this.openaiService.isSupportedExtension(d.file)) - .map(async d => { - const functions = d.file.endsWith('.py') - ? this.getFunctions(d.content, PYTHON_FUNCTION_HEADER, findIndentEnd) - : this.getFunctions(d.content, CLIKE_FUNCTION_HEADER, findClosingBrace) - ; - const fileTotal = await Promise.all(functions.map(async ({line, name, text}) => { - const {tokens} = await this.upsert({ - id: `${d.solution}-${d.file}-${line}`, - assignment: assignmentId, - type: 'snippet', - solution: d.solution, - file: d.file, - line, - name, - text: `${d.file}\n\n${text}`, - embedding: [], - }, apiKey); - return tokens; - })); - return fileTotal.reduce((a, b) => a + b, 0); + const results = await Promise.all(documents.map(async d => { + const functions = d.file.endsWith('.py') + ? this.getFunctions(d.content, PYTHON_FUNCTION_HEADER, findIndentEnd) + : this.getFunctions(d.content, CLIKE_FUNCTION_HEADER, findClosingBrace) + ; + const fileTotal = await Promise.all(functions.map(async ({line, name, text}) => { + const {tokens} = await this.upsert({ + id: `${d.solution}-${d.file}-${line}`, + assignment: assignmentId, + type: 'snippet', + solution: d.solution, + file: d.file, + line, + name, + text: `${d.file}\n\n${text}`, + embedding: [], + }, apiKey); + return tokens; })); + return fileTotal.reduce((a, b) => a + b, 0); + })); const tokens = results.reduce((a, b) => a + b, 0); return this.createEstimate(solutions, documents, tokens); } @@ -136,9 +136,14 @@ export class EmbeddingService implements OnModuleInit { filter['consent.3P'] = true; } const solutionsWithConsent = await this.solutionService.findAll(filter, {projection: {_id: 1}}); + const allDocuments = await this.searchService.findAll(assignment._id.toString(), solutionsWithConsent.map(s => s.id)); + + const ignoreFn = assignment.classroom?.openaiIgnore ? ignore.compile(assignment.classroom.openaiIgnore) as (path: string) => boolean : undefined; + const documents = allDocuments.filter(d => this.openaiService.isSupportedExtension(d.file) && (!ignoreFn || !ignoreFn(d.file))); + return { solutions: solutionsWithConsent.length, - documents: await this.searchService.findAll(assignment._id.toString(), solutionsWithConsent.map(s => s.id)), + documents, }; } diff --git a/services/package.json b/services/package.json index e6354dc68..9c71b4302 100644 --- a/services/package.json +++ b/services/package.json @@ -48,6 +48,7 @@ "dockerode": "^3.3.5", "express": "^4.18.2", "glob-to-regexp": "^0.4.1", + "ignore-file": "^1.1.3", "jsdom": "^22.1.0", "mongoose": "^7.6.4", "multer": "1.4.5-lts.1", diff --git a/services/pnpm-lock.yaml b/services/pnpm-lock.yaml index 4a36a39ac..760928056 100644 --- a/services/pnpm-lock.yaml +++ b/services/pnpm-lock.yaml @@ -90,6 +90,9 @@ dependencies: glob-to-regexp: specifier: ^0.4.1 version: 0.4.1 + ignore-file: + specifier: ^1.1.3 + version: 1.1.3 jsdom: specifier: ^22.1.0 version: 22.1.0 @@ -4063,6 +4066,12 @@ packages: /ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} + /ignore-file@1.1.3: + resolution: {integrity: sha512-PQL2H3ttelHPv6oeYfEQXBeArj4nTG4OHuv3Cpn21x19pnLIapkHAx7O0KzMWHc0ziw27vWS3nJiODhyaGEdyw==} + dependencies: + minimatch: 3.1.2 + dev: false + /ignore@5.2.4: resolution: {integrity: sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==} engines: {node: '>= 4'} From 4cf4913b3908a50f2553770f928c9bf515726456 Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:17:12 +0100 Subject: [PATCH 02/11] fix(assignments-service): Better embedding estimate --- .../src/embedding/embedding.controller.ts | 4 +--- .../src/embedding/embedding.service.ts | 18 ++++++------------ .../src/embedding/openai.service.ts | 13 ++----------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/services/apps/assignments/src/embedding/embedding.controller.ts b/services/apps/assignments/src/embedding/embedding.controller.ts index 7518af89f..bee94043f 100644 --- a/services/apps/assignments/src/embedding/embedding.controller.ts +++ b/services/apps/assignments/src/embedding/embedding.controller.ts @@ -33,9 +33,7 @@ export class EmbeddingController { @Query('estimate', new ParseBoolPipe({optional: true})) estimate?: boolean, ): Promise { const assignment = await this.assignmentService.find(assignmentId) || notFound(assignmentId); - return estimate - ? this.embeddingService.estimateEmbeddings(assignment) - : this.embeddingService.createEmbeddings(assignment); + return this.embeddingService.createEmbeddings(assignment, estimate); } @Get('embeddings') diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index f0546eafb..0bb3b74ae 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -68,16 +68,6 @@ export class EmbeddingService implements OnModuleInit { }, undefined); } - async estimateEmbeddings(assignment: Assignment): Promise { - const {solutions, documents} = await this.getDocuments(assignment); - const tokens = this.openaiService.countTokens(documents.map(d => ({ - name: d.file, - content: d.content, - size: d.content.length - }))); - return this.createEstimate(solutions, documents, tokens); - } - getFunctions(file: string, headPattern: RegExp, findEnd: (code: string, headStart: number, headEnd: number) => number): DeclarationSnippet[] { const results: DeclarationSnippet[] = []; const lineStarts = this.searchService._buildLineStartList(file); @@ -92,7 +82,7 @@ export class EmbeddingService implements OnModuleInit { return results; } - async createEmbeddings(assignment: Assignment): Promise { + async createEmbeddings(assignment: Assignment, estimate = false): Promise { const apiKey = assignment.classroom?.openaiApiKey; if (!apiKey) { throw new ForbiddenException('No OpenAI API key configured for this assignment.'); @@ -106,6 +96,10 @@ export class EmbeddingService implements OnModuleInit { : this.getFunctions(d.content, CLIKE_FUNCTION_HEADER, findClosingBrace) ; const fileTotal = await Promise.all(functions.map(async ({line, name, text}) => { + const embeddableText = `${d.file}\n\n${text}`; + if (estimate) { + return this.openaiService.countTokens(embeddableText); + } const {tokens} = await this.upsert({ id: `${d.solution}-${d.file}-${line}`, assignment: assignmentId, @@ -114,7 +108,7 @@ export class EmbeddingService implements OnModuleInit { file: d.file, line, name, - text: `${d.file}\n\n${text}`, + text: embeddableText, embedding: [], }, apiKey); return tokens; diff --git a/services/apps/assignments/src/embedding/openai.service.ts b/services/apps/assignments/src/embedding/openai.service.ts index 3d6038986..d3e11ff33 100644 --- a/services/apps/assignments/src/embedding/openai.service.ts +++ b/services/apps/assignments/src/embedding/openai.service.ts @@ -14,17 +14,8 @@ export class OpenAIService implements OnModuleDestroy { this.enc.free(); } - countTokens(files: File[]): number { - let total = 0; - for (const file of files) { - if (!this.isSupportedExtension(file.name)) { - continue; - } - - const tokens = this.enc.encode(file.content.toString()).length; - total += tokens; - } - return total; + countTokens(text: string): number { + return this.enc.encode(text).length; } isSupportedExtension(filename: string) { From 305f6c031fc16cc612deae2ff48988316f92941b Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:19:10 +0100 Subject: [PATCH 03/11] refactor(assignments-service): Restructure EmbeddingService --- .../src/embedding/embedding.service.ts | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index 0bb3b74ae..8589b5842 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -1,6 +1,6 @@ import {ForbiddenException, Injectable, OnModuleInit} from '@nestjs/common'; import {ElasticsearchService} from "@nestjs/elasticsearch"; -import {FileDocument, SearchService} from "../search/search.service"; +import {SearchService} from "../search/search.service"; import {Embeddable, EmbeddableSearch, EmbeddingEstimate, SnippetEmbeddable} from "./embedding.dto"; import {OpenAIService} from "./openai.service"; import {QueryDslQueryContainer} from "@elastic/elasticsearch/lib/api/types"; @@ -68,20 +68,6 @@ export class EmbeddingService implements OnModuleInit { }, undefined); } - getFunctions(file: string, headPattern: RegExp, findEnd: (code: string, headStart: number, headEnd: number) => number): DeclarationSnippet[] { - const results: DeclarationSnippet[] = []; - const lineStarts = this.searchService._buildLineStartList(file); - for (const match of file.matchAll(headPattern)) { - const name = match[1]; - const start = match.index!; - const {line, character: column} = this.searchService._findLocation(lineStarts, start); - const end = findEnd(file, start, start + match[0].length); - const text = file.substring(start - column, end + 1); - results.push({line, name, text}); - } - return results; - } - async createEmbeddings(assignment: Assignment, estimate = false): Promise { const apiKey = assignment.classroom?.openaiApiKey; if (!apiKey) { @@ -116,10 +102,6 @@ export class EmbeddingService implements OnModuleInit { return fileTotal.reduce((a, b) => a + b, 0); })); const tokens = results.reduce((a, b) => a + b, 0); - return this.createEstimate(solutions, documents, tokens); - } - - private createEstimate(solutions: number, documents: FileDocument[], tokens: number): EmbeddingEstimate { const estimatedCost = this.openaiService.estimateCost(tokens); return {solutions, files: documents.length, tokens, estimatedCost}; } @@ -141,6 +123,20 @@ export class EmbeddingService implements OnModuleInit { }; } + getFunctions(file: string, headPattern: RegExp, findEnd: (code: string, headStart: number, headEnd: number) => number): DeclarationSnippet[] { + const results: DeclarationSnippet[] = []; + const lineStarts = this.searchService._buildLineStartList(file); + for (const match of file.matchAll(headPattern)) { + const name = match[1]; + const start = match.index!; + const {line, character: column} = this.searchService._findLocation(lineStarts, start); + const end = findEnd(file, start, start + match[0].length); + const text = file.substring(start - column, end + 1); + results.push({line, name, text}); + } + return results; + } + async upsert(embeddable: Embeddable, apiKey: string): Promise<{ embeddable: Embeddable, tokens: number }> { const existing = await this.find(embeddable.id); if (existing && existing.text === embeddable.text) { From f086a1ec31ca9758b74c9872e311f1f70a2436ca Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:21:22 +0100 Subject: [PATCH 04/11] feat(assignments-service): Ignore individual methods using # --- .../apps/assignments/src/embedding/embedding.service.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index 8589b5842..6b8795e50 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -75,13 +75,17 @@ export class EmbeddingService implements OnModuleInit { } const assignmentId = assignment._id.toString(); - const {solutions, documents} = await this.getDocuments(assignment); + const {solutions, documents, ignoreFn} = await this.getDocuments(assignment); const results = await Promise.all(documents.map(async d => { const functions = d.file.endsWith('.py') ? this.getFunctions(d.content, PYTHON_FUNCTION_HEADER, findIndentEnd) : this.getFunctions(d.content, CLIKE_FUNCTION_HEADER, findClosingBrace) ; const fileTotal = await Promise.all(functions.map(async ({line, name, text}) => { + if (ignoreFn && ignoreFn(`${d.file}#${name}`)) { + return 0; + } + const embeddableText = `${d.file}\n\n${text}`; if (estimate) { return this.openaiService.countTokens(embeddableText); @@ -120,6 +124,7 @@ export class EmbeddingService implements OnModuleInit { return { solutions: solutionsWithConsent.length, documents, + ignoreFn, }; } From 79b0d84b9f66400af126df30326ccf1fade1e13d Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:32:14 +0100 Subject: [PATCH 05/11] test(assignments-service): Add ignore file tests --- .../src/embedding/embedding.service.spec.ts | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/services/apps/assignments/src/embedding/embedding.service.spec.ts b/services/apps/assignments/src/embedding/embedding.service.spec.ts index ce41aa430..63208d779 100644 --- a/services/apps/assignments/src/embedding/embedding.service.spec.ts +++ b/services/apps/assignments/src/embedding/embedding.service.spec.ts @@ -10,6 +10,8 @@ import {ElasticsearchService} from "@nestjs/elasticsearch"; import {SearchService} from "../search/search.service"; import {OpenAIService} from "./openai.service"; import {SolutionService} from "../solution/solution.service"; +// @ts-ignore +import * as ignore from 'ignore-file'; describe('EmbeddingService', () => { let service: EmbeddingService; @@ -119,3 +121,34 @@ def baz(): expect(findIndentEnd(code.trim(), 35, 45)).toEqual(51); }); }); + +describe('Ignore snippets', () => { + it('should ignore files', () => { + const ignoreFile = `\ + foo/ + !foo/Bar.java + `; + const ignoreFn = ignore.compile(ignoreFile) as (path: string) => boolean; + expect(ignoreFn('foo/Foo.java')).toEqual(true); + expect(ignoreFn('foo/Bar.java')).toEqual(false); + }); + + it('should ignore methods', () => { + const ignoreFile = `\ + Foo.java#* + !Foo.java#bar + + Bar.java + !Bar.java#baz + `; + const ignoreFn = ignore.compile(ignoreFile) as (path: string) => boolean; + // this is important, otherwise the documents will be pre-filtered + expect(ignoreFn('Foo.java')).toEqual(false); + expect(ignoreFn('Foo.java#bar')).toEqual(false); + expect(ignoreFn('Foo.java#baz')).toEqual(true); + + expect(ignoreFn('Bar.java')).toEqual(true); + expect(ignoreFn('Bar.java#bar')).toEqual(false); + expect(ignoreFn('Bar.java#baz')).toEqual(false); + }); +}); From eef7b445f3dbfae39bf6e83bb93321ab0d07b89a Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 14:54:18 +0100 Subject: [PATCH 06/11] feat(frontend): Add field for embedding ignore list --- .../src/app/assignment/model/assignment.ts | 1 + .../code-search/code-search.component.html | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/frontend/src/app/assignment/model/assignment.ts b/frontend/src/app/assignment/model/assignment.ts index 476b14b16..5d455cb25 100644 --- a/frontend/src/app/assignment/model/assignment.ts +++ b/frontend/src/app/assignment/model/assignment.ts @@ -11,6 +11,7 @@ export interface ClassroomInfo { mossResult?: string; openaiApiKey?: string; openaiConsent?: boolean; + openaiIgnore?: string; } export default class Assignment { diff --git a/frontend/src/app/assignment/modules/edit-assignment/code-search/code-search.component.html b/frontend/src/app/assignment/modules/edit-assignment/code-search/code-search.component.html index 666a04607..156cfe4ce 100644 --- a/frontend/src/app/assignment/modules/edit-assignment/code-search/code-search.component.html +++ b/frontend/src/app/assignment/modules/edit-assignment/code-search/code-search.component.html @@ -44,4 +44,40 @@ If disabled, the students' consent will be ignored and all submissions will be indexed. +
+ + +
+ A gitignore-like list of directories, files and methods to ignore when indexing code. +
+ More Info +
    +
  • + Supports wildcards, negation, and other minimatch patterns. +
  • +
  • + To ignore a file, use Foo.java. +
  • +
  • + To ignore a directory, use bar/. +
  • +
  • + To ignore a directory except for a specific file, use bar/ and !bar/Foo.java. +
  • +
  • + To ignore a method or function, use Foo.java#baz. +
  • +
  • + To ignore all methods and functions except for a specific one, use Foo.java#* and !Foo.java#baz. +
  • +
  • + If a file is already ignored, all method rules for that file, even allow rules, have no effect. +
  • +
+
+
+
} From d90273f010d45a8e6db47e475ebd6193861634df Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 16:26:18 +0100 Subject: [PATCH 07/11] refactor(assignments-service): Simplify embedding creation --- .../src/embedding/embedding.service.ts | 74 +++++++++---------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index 6b8795e50..0a437077b 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -1,6 +1,6 @@ import {ForbiddenException, Injectable, OnModuleInit} from '@nestjs/common'; import {ElasticsearchService} from "@nestjs/elasticsearch"; -import {SearchService} from "../search/search.service"; +import {FileDocument, SearchService} from "../search/search.service"; import {Embeddable, EmbeddableSearch, EmbeddingEstimate, SnippetEmbeddable} from "./embedding.dto"; import {OpenAIService} from "./openai.service"; import {QueryDslQueryContainer} from "@elastic/elasticsearch/lib/api/types"; @@ -11,7 +11,7 @@ import {Solution} from "../solution/solution.schema"; // @ts-ignore import * as ignore from 'ignore-file'; -type DeclarationSnippet = Pick & { name: string }; +type DeclarationSnippet = SnippetEmbeddable & { name: string }; @Injectable() export class EmbeddingService implements OnModuleInit { @@ -73,39 +73,26 @@ export class EmbeddingService implements OnModuleInit { if (!apiKey) { throw new ForbiddenException('No OpenAI API key configured for this assignment.'); } - const assignmentId = assignment._id.toString(); const {solutions, documents, ignoreFn} = await this.getDocuments(assignment); - const results = await Promise.all(documents.map(async d => { - const functions = d.file.endsWith('.py') - ? this.getFunctions(d.content, PYTHON_FUNCTION_HEADER, findIndentEnd) - : this.getFunctions(d.content, CLIKE_FUNCTION_HEADER, findClosingBrace) - ; - const fileTotal = await Promise.all(functions.map(async ({line, name, text}) => { - if (ignoreFn && ignoreFn(`${d.file}#${name}`)) { - return 0; - } + const functions = documents + .flatMap(d => d.file.endsWith('.py') + ? this.getFunctions(d, PYTHON_FUNCTION_HEADER, findIndentEnd) + : this.getFunctions(d, CLIKE_FUNCTION_HEADER, findClosingBrace) + ) + .filter(f => !ignoreFn || !ignoreFn(`${f.file}#${f.name}`)) + ; + + let tokens = 0; + if (estimate) { + for (const func of functions) { + tokens += this.openaiService.countTokens(func.text); + } + } else { + tokens = (await Promise.all(functions.map(async func => this.upsert(func, apiKey).then(({tokens}) => tokens)))) + .reduce((a, b) => a + b, 0); + } - const embeddableText = `${d.file}\n\n${text}`; - if (estimate) { - return this.openaiService.countTokens(embeddableText); - } - const {tokens} = await this.upsert({ - id: `${d.solution}-${d.file}-${line}`, - assignment: assignmentId, - type: 'snippet', - solution: d.solution, - file: d.file, - line, - name, - text: embeddableText, - embedding: [], - }, apiKey); - return tokens; - })); - return fileTotal.reduce((a, b) => a + b, 0); - })); - const tokens = results.reduce((a, b) => a + b, 0); const estimatedCost = this.openaiService.estimateCost(tokens); return {solutions, files: documents.length, tokens, estimatedCost}; } @@ -128,16 +115,27 @@ export class EmbeddingService implements OnModuleInit { }; } - getFunctions(file: string, headPattern: RegExp, findEnd: (code: string, headStart: number, headEnd: number) => number): DeclarationSnippet[] { + getFunctions(document: FileDocument, headPattern: RegExp, findEnd: (code: string, headStart: number, headEnd: number) => number): DeclarationSnippet[] { + const {content, file, solution, assignment} = document; const results: DeclarationSnippet[] = []; - const lineStarts = this.searchService._buildLineStartList(file); - for (const match of file.matchAll(headPattern)) { + const lineStarts = this.searchService._buildLineStartList(content); + for (const match of content.matchAll(headPattern)) { const name = match[1]; const start = match.index!; const {line, character: column} = this.searchService._findLocation(lineStarts, start); - const end = findEnd(file, start, start + match[0].length); - const text = file.substring(start - column, end + 1); - results.push({line, name, text}); + const end = findEnd(content, start, start + match[0].length); + const text = content.substring(start - column, end + 1); + results.push({ + id: `${solution}-${file}-${line}`, + type: 'snippet', + assignment, + solution, + file, + line, + name, + text, + embedding: [], + }); } return results; } From fee86d0923976021a35c53951ddba51427ac95ac Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 16:36:58 +0100 Subject: [PATCH 08/11] feat(assignments-service): Return list of imported functions and ignored files and functions --- .../src/embedding/embedding.dto.ts | 27 ++++++++++++- .../src/embedding/embedding.service.ts | 40 ++++++++++++++++--- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/services/apps/assignments/src/embedding/embedding.dto.ts b/services/apps/assignments/src/embedding/embedding.dto.ts index 9804fc26b..95f9e4e44 100644 --- a/services/apps/assignments/src/embedding/embedding.dto.ts +++ b/services/apps/assignments/src/embedding/embedding.dto.ts @@ -1,5 +1,15 @@ import {ApiProperty, ApiPropertyOptional} from "@nestjs/swagger"; -import {ArrayMaxSize, ArrayMinSize, IsIn, IsInt, IsMongoId, IsNumber, IsOptional, IsString} from "class-validator"; +import { + ArrayMaxSize, + ArrayMinSize, + IsArray, + IsIn, + IsInt, + IsMongoId, + IsNumber, + IsOptional, + IsString +} from "class-validator"; export class EmbeddingEstimate { @ApiProperty() @@ -17,6 +27,21 @@ export class EmbeddingEstimate { @ApiProperty() @IsNumber({maxDecimalPlaces: 2}) estimatedCost: number; + + @ApiProperty() + @IsArray() + @IsString({each: true}) + functions: string[]; + + @ApiProperty() + @IsArray() + @IsString({each: true}) + ignoredFiles: string[]; + + @ApiProperty() + @IsArray() + @IsString({each: true}) + ignoredFunctions: string[]; } export class EmbeddableBase { diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index 0a437077b..b3142544f 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -74,14 +74,23 @@ export class EmbeddingService implements OnModuleInit { throw new ForbiddenException('No OpenAI API key configured for this assignment.'); } - const {solutions, documents, ignoreFn} = await this.getDocuments(assignment); - const functions = documents + const {solutions, documents, ignoreFn, ignoredFiles} = await this.getDocuments(assignment); + const ignoredFunctions = new Set(); + let functions = documents .flatMap(d => d.file.endsWith('.py') ? this.getFunctions(d, PYTHON_FUNCTION_HEADER, findIndentEnd) : this.getFunctions(d, CLIKE_FUNCTION_HEADER, findClosingBrace) ) - .filter(f => !ignoreFn || !ignoreFn(`${f.file}#${f.name}`)) ; + if (ignoreFn) { + functions = functions.filter(f => { + if (ignoreFn(f.file)) { + ignoredFunctions.add(f.id); + return false; + } + return true; + }); + } let tokens = 0; if (estimate) { @@ -93,8 +102,15 @@ export class EmbeddingService implements OnModuleInit { .reduce((a, b) => a + b, 0); } - const estimatedCost = this.openaiService.estimateCost(tokens); - return {solutions, files: documents.length, tokens, estimatedCost}; + return { + solutions, + files: documents.length, + tokens, + estimatedCost: this.openaiService.estimateCost(tokens), + functions: functions.map(f => `${f.file}#${f.name}`), + ignoredFiles: Array.from(ignoredFiles), + ignoredFunctions: Array.from(ignoredFunctions), + }; } private async getDocuments(assignment: Assignment) { @@ -106,12 +122,24 @@ export class EmbeddingService implements OnModuleInit { const allDocuments = await this.searchService.findAll(assignment._id.toString(), solutionsWithConsent.map(s => s.id)); const ignoreFn = assignment.classroom?.openaiIgnore ? ignore.compile(assignment.classroom.openaiIgnore) as (path: string) => boolean : undefined; - const documents = allDocuments.filter(d => this.openaiService.isSupportedExtension(d.file) && (!ignoreFn || !ignoreFn(d.file))); + const ignoredFiles = new Set(); + const documents = allDocuments.filter(d => { + if (!this.openaiService.isSupportedExtension(d.file)) { + ignoredFiles.add(d.file); + return false; + } + if (ignoreFn && ignoreFn(d.file)) { + ignoredFiles.add(d.file); + return false; + } + return true; + }); return { solutions: solutionsWithConsent.length, documents, ignoreFn, + ignoredFiles, }; } From df203d96fa9a67658c8d8a05f95e71ee11bf9903 Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Sat, 25 Nov 2023 17:02:56 +0100 Subject: [PATCH 09/11] feat(frontend): Show list of imported functions, ignored files and ignored functions --- frontend/src/app/assignment/model/solution.ts | 3 ++ .../import-embeddings.component.html | 54 ++++++++++++------- .../import-embeddings.component.ts | 11 ++-- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/frontend/src/app/assignment/model/solution.ts b/frontend/src/app/assignment/model/solution.ts index 3c8ae7d9d..db79a42b8 100644 --- a/frontend/src/app/assignment/model/solution.ts +++ b/frontend/src/app/assignment/model/solution.ts @@ -65,6 +65,9 @@ export interface EstimatedCosts { files: number; tokens: number; estimatedCost: number; + functions: string[]; + ignoredFiles: string[]; + ignoredFunctions: string[]; } export class Feedback { diff --git a/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.html b/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.html index 3a6a420cf..2380ebada 100644 --- a/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.html +++ b/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.html @@ -1,31 +1,45 @@

After importing from GitHub or Files, you can import embeddings.

-

- Here is an estimate of the token costs. - Actual costs may be lower if the embeddings are already cached, - or slightly higher due to some tokens like filenames being added to improve results. - Charges will be applied to your OpenAI account. -

-

- Please review the results below and click Import to confirm. -

-@if (estimatedCosts) { -
- - - - -
-} -@if (finalCosts) { +@if (costsAreFinal) {

The import has been completed. The following costs have been applied to your OpenAI account.

+} @else { +

+ Here is an estimate of the token costs. + Actual costs may be lower if the embeddings are already cached, + or slightly higher due to some tokens like filenames being added to improve results. + Charges will be applied to your OpenAI account. +

+

+ Please review the results below and click Import to confirm. +

+} +@if (costs) {
- - + + + + + +
+
+ +
+ @if (costs.ignoredFiles.length > 0) { +
+ + +
+ } + @if (costs.ignoredFunctions.length > 0) { +
+ + +
+ } } diff --git a/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.ts b/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.ts index 77e4c6535..9079a82ea 100644 --- a/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.ts +++ b/frontend/src/app/assignment/modules/import/import-embeddings/import-embeddings.component.ts @@ -10,8 +10,8 @@ import {EmbeddingService} from "../../../services/embedding.service"; styleUrls: ['./import-embeddings.component.scss'] }) export class ImportEmbeddingsComponent implements OnInit { - estimatedCosts?: EstimatedCosts; - finalCosts?: EstimatedCosts; + costs?: EstimatedCosts; + costsAreFinal = false; constructor( private embeddingService: EmbeddingService, @@ -22,13 +22,16 @@ export class ImportEmbeddingsComponent implements OnInit { ngOnInit() { this.route.params.pipe( switchMap(({aid}) => this.embeddingService.import(aid, true)), - ).subscribe(costs => this.estimatedCosts = costs); + ).subscribe(costs => this.costs = costs); } import() { const assignmentId = this.route.snapshot.params.aid; return this.embeddingService.import(assignmentId).pipe( - tap(result => this.finalCosts = result), + tap(result => { + this.costs = result; + this.costsAreFinal = true; + }), ); } } From b1ea9ee5658199d255d9f5e73b4f826276610251 Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Mon, 27 Nov 2023 12:36:08 +0100 Subject: [PATCH 10/11] fix(assignments-service): Add ignore-file types --- .../apps/assignments/src/embedding/embedding.service.spec.ts | 1 - services/apps/assignments/src/embedding/embedding.service.ts | 1 - services/tsconfig.json | 5 ++++- services/types/ignore-file.d.ts | 3 +++ 4 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 services/types/ignore-file.d.ts diff --git a/services/apps/assignments/src/embedding/embedding.service.spec.ts b/services/apps/assignments/src/embedding/embedding.service.spec.ts index 63208d779..83f105591 100644 --- a/services/apps/assignments/src/embedding/embedding.service.spec.ts +++ b/services/apps/assignments/src/embedding/embedding.service.spec.ts @@ -10,7 +10,6 @@ import {ElasticsearchService} from "@nestjs/elasticsearch"; import {SearchService} from "../search/search.service"; import {OpenAIService} from "./openai.service"; import {SolutionService} from "../solution/solution.service"; -// @ts-ignore import * as ignore from 'ignore-file'; describe('EmbeddingService', () => { diff --git a/services/apps/assignments/src/embedding/embedding.service.ts b/services/apps/assignments/src/embedding/embedding.service.ts index b3142544f..77414c45d 100644 --- a/services/apps/assignments/src/embedding/embedding.service.ts +++ b/services/apps/assignments/src/embedding/embedding.service.ts @@ -8,7 +8,6 @@ import {SolutionService} from "../solution/solution.service"; import {Assignment} from "../assignment/assignment.schema"; import {FilterQuery} from "mongoose"; import {Solution} from "../solution/solution.schema"; -// @ts-ignore import * as ignore from 'ignore-file'; type DeclarationSnippet = SnippetEmbeddable & { name: string }; diff --git a/services/tsconfig.json b/services/tsconfig.json index 20fa4d850..e8402b7d9 100644 --- a/services/tsconfig.json +++ b/services/tsconfig.json @@ -16,6 +16,9 @@ "strict": true, "strictPropertyInitialization": false, "paths": { + "*": [ + "types/*" + ], "@app/keycloak-auth": [ "libs/keycloak-auth/src" ], @@ -42,4 +45,4 @@ ] } } -} \ No newline at end of file +} diff --git a/services/types/ignore-file.d.ts b/services/types/ignore-file.d.ts new file mode 100644 index 000000000..6d6a6a603 --- /dev/null +++ b/services/types/ignore-file.d.ts @@ -0,0 +1,3 @@ +declare module 'ignore-file' { + export function compile(patterns: string): (path: string) => boolean; +} From 132479a73f63f5415235e4853b8a3446a00a24d3 Mon Sep 17 00:00:00 2001 From: Adrian Kunz Date: Mon, 27 Nov 2023 12:39:39 +0100 Subject: [PATCH 11/11] test(assignments-service): Fix embedding.service.spec.ts --- .../src/embedding/embedding.service.spec.ts | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/services/apps/assignments/src/embedding/embedding.service.spec.ts b/services/apps/assignments/src/embedding/embedding.service.spec.ts index 83f105591..dd2676acf 100644 --- a/services/apps/assignments/src/embedding/embedding.service.spec.ts +++ b/services/apps/assignments/src/embedding/embedding.service.spec.ts @@ -7,7 +7,7 @@ import { PYTHON_FUNCTION_HEADER } from './embedding.service'; import {ElasticsearchService} from "@nestjs/elasticsearch"; -import {SearchService} from "../search/search.service"; +import {FileDocument, SearchService} from "../search/search.service"; import {OpenAIService} from "./openai.service"; import {SolutionService} from "../solution/solution.service"; import * as ignore from 'ignore-file'; @@ -42,9 +42,21 @@ class Foo { } } `; - - expect(service.getFunctions(code, CLIKE_FUNCTION_HEADER, findClosingBrace)).toEqual([ + const doc: FileDocument = { + assignment: 'a1', + solution: 's1', + file: 'Foo.java', + content: code, + }; + + expect(service.getFunctions(doc, CLIKE_FUNCTION_HEADER, findClosingBrace)).toEqual([ { + assignment: 'a1', + solution: 's1', + file: 'Foo.java', + id: 's1-Foo.java-1', + type: 'snippet', + embedding: [], name: 'bar', line: 1, text: `\ @@ -53,6 +65,12 @@ class Foo { }`, }, { + assignment: 'a1', + solution: 's1', + file: 'Foo.java', + id: 's1-Foo.java-5', + type: 'snippet', + embedding: [], name: 'baz', line: 5, text: `\ @@ -78,9 +96,21 @@ class Foo: if i != 0: i = i + 1 `; - - expect(service.getFunctions(code, PYTHON_FUNCTION_HEADER, findIndentEnd)).toEqual([ + const doc: FileDocument = { + assignment: 'a1', + solution: 's1', + file: 'Foo.py', + content: code, + }; + + expect(service.getFunctions(doc, PYTHON_FUNCTION_HEADER, findIndentEnd)).toEqual([ { + assignment: 'a1', + solution: 's1', + file: 'Foo.py', + id: 's1-Foo.py-1', + type: 'snippet', + embedding: [], name: 'bar', line: 1, text: `\ @@ -92,6 +122,12 @@ class Foo: `, }, { + assignment: 'a1', + solution: 's1', + file: 'Foo.py', + id: 's1-Foo.py-6', + type: 'snippet', + embedding: [], name: 'baz', line: 6, text: `\